1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.242 1998/05/06 15:30:30 bsmith Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } else { 294 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 295 } 296 } 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNC__ 301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 303 { 304 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 305 MPI_Comm comm = mat->comm; 306 int size = aij->size, *owners = aij->rowners; 307 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 308 MPI_Request *send_waits,*recv_waits; 309 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 310 InsertMode addv; 311 Scalar *rvalues,*svalues; 312 313 PetscFunctionBegin; 314 /* make sure all processors are either in INSERTMODE or ADDMODE */ 315 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 316 if (addv == (ADD_VALUES|INSERT_VALUES)) { 317 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 318 } 319 mat->insertmode = addv; /* in case this processor had no cache */ 320 321 /* first count number of contributors to each processor */ 322 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 324 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 325 for ( i=0; i<aij->stash.n; i++ ) { 326 idx = aij->stash.idx[i]; 327 for ( j=0; j<size; j++ ) { 328 if (idx >= owners[j] && idx < owners[j+1]) { 329 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 330 } 331 } 332 } 333 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 334 335 /* inform other processors of number of messages and max length*/ 336 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 337 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 338 nreceives = work[rank]; 339 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 340 nmax = work[rank]; 341 PetscFree(work); 342 343 /* post receives: 344 1) each message will consist of ordered pairs 345 (global index,value) we store the global index as a double 346 to simplify the message passing. 347 2) since we don't know how long each individual message is we 348 allocate the largest needed buffer for each receive. Potentially 349 this is a lot of wasted space. 350 351 352 This could be done better. 353 */ 354 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 355 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 356 for ( i=0; i<nreceives; i++ ) { 357 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 358 comm,recv_waits+i);CHKERRQ(ierr); 359 } 360 361 /* do sends: 362 1) starts[i] gives the starting index in svalues for stuff going to 363 the ith processor 364 */ 365 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 366 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 367 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 368 starts[0] = 0; 369 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 370 for ( i=0; i<aij->stash.n; i++ ) { 371 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 372 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 373 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 374 } 375 PetscFree(owner); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 382 } 383 } 384 PetscFree(starts); PetscFree(nprocs); 385 386 /* Free cache space */ 387 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 388 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 389 390 aij->svalues = svalues; aij->rvalues = rvalues; 391 aij->nsends = nsends; aij->nrecvs = nreceives; 392 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 393 aij->rmax = nmax; 394 395 PetscFunctionReturn(0); 396 } 397 extern int MatSetUpMultiply_MPIAIJ(Mat); 398 399 #undef __FUNC__ 400 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 401 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 402 { 403 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 404 MPI_Status *send_status,recv_status; 405 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 406 int row,col,other_disassembled; 407 Scalar *values,val; 408 InsertMode addv = mat->insertmode; 409 410 PetscFunctionBegin; 411 /* wait on receives */ 412 while (count) { 413 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 414 /* unpack receives into our local space */ 415 values = aij->rvalues + 3*imdex*aij->rmax; 416 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 417 n = n/3; 418 for ( i=0; i<n; i++ ) { 419 row = (int) PetscReal(values[3*i]) - aij->rstart; 420 col = (int) PetscReal(values[3*i+1]); 421 val = values[3*i+2]; 422 if (col >= aij->cstart && col < aij->cend) { 423 col -= aij->cstart; 424 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 425 } else { 426 if (mat->was_assembled || mat->assembled) { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 429 } 430 col = aij->colmap[col] - 1; 431 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 432 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 433 col = (int) PetscReal(values[3*i+1]); 434 } 435 } 436 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 437 } 438 } 439 count--; 440 } 441 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 442 443 /* wait on sends */ 444 if (aij->nsends) { 445 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 446 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 447 PetscFree(send_status); 448 } 449 PetscFree(aij->send_waits); PetscFree(aij->svalues); 450 451 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 452 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 453 454 /* determine if any processor has disassembled, if so we must 455 also disassemble ourselfs, in order that we may reassemble. */ 456 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 457 if (mat->was_assembled && !other_disassembled) { 458 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 459 } 460 461 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 462 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 463 } 464 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 465 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 466 467 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNC__ 472 #define __FUNC__ "MatZeroEntries_MPIAIJ" 473 int MatZeroEntries_MPIAIJ(Mat A) 474 { 475 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 480 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNC__ 485 #define __FUNC__ "MatZeroRows_MPIAIJ" 486 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 487 { 488 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 489 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 490 int *procs,*nprocs,j,found,idx,nsends,*work,row; 491 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 492 int *rvalues,tag = A->tag,count,base,slen,n,*source; 493 int *lens,imdex,*lrows,*values,rstart=l->rstart; 494 MPI_Comm comm = A->comm; 495 MPI_Request *send_waits,*recv_waits; 496 MPI_Status recv_status,*send_status; 497 IS istmp; 498 PetscTruth localdiag; 499 500 PetscFunctionBegin; 501 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 502 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 503 504 /* first count number of contributors to each processor */ 505 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 506 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 507 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 508 for ( i=0; i<N; i++ ) { 509 idx = rows[i]; 510 found = 0; 511 for ( j=0; j<size; j++ ) { 512 if (idx >= owners[j] && idx < owners[j+1]) { 513 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 514 } 515 } 516 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 517 } 518 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 519 520 /* inform other processors of number of messages and max length*/ 521 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 522 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 523 nrecvs = work[rank]; 524 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 525 nmax = work[rank]; 526 PetscFree(work); 527 528 /* post receives: */ 529 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 530 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 531 for ( i=0; i<nrecvs; i++ ) { 532 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 533 } 534 535 /* do sends: 536 1) starts[i] gives the starting index in svalues for stuff going to 537 the ith processor 538 */ 539 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 540 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 541 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 542 starts[0] = 0; 543 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 544 for ( i=0; i<N; i++ ) { 545 svalues[starts[owner[i]]++] = rows[i]; 546 } 547 ISRestoreIndices(is,&rows); 548 549 starts[0] = 0; 550 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 551 count = 0; 552 for ( i=0; i<size; i++ ) { 553 if (procs[i]) { 554 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 555 } 556 } 557 PetscFree(starts); 558 559 base = owners[rank]; 560 561 /* wait on receives */ 562 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 563 source = lens + nrecvs; 564 count = nrecvs; slen = 0; 565 while (count) { 566 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 567 /* unpack receives into our local space */ 568 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 569 source[imdex] = recv_status.MPI_SOURCE; 570 lens[imdex] = n; 571 slen += n; 572 count--; 573 } 574 PetscFree(recv_waits); 575 576 /* move the data into the send scatter */ 577 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 578 count = 0; 579 for ( i=0; i<nrecvs; i++ ) { 580 values = rvalues + i*nmax; 581 for ( j=0; j<lens[i]; j++ ) { 582 lrows[count++] = values[j] - base; 583 } 584 } 585 PetscFree(rvalues); PetscFree(lens); 586 PetscFree(owner); PetscFree(nprocs); 587 588 /* actually zap the local rows */ 589 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 590 PLogObjectParent(A,istmp); 591 592 /* 593 Zero the required rows. If the "diagonal block" of the matrix 594 is square and the user wishes to set the diagonal we use seperate 595 code so that MatSetValues() is not called for each diagonal allocating 596 new memory, thus calling lots of mallocs and slowing things down. 597 598 Contributed by: Mathew Knepley 599 */ 600 localdiag = PETSC_FALSE; 601 if (diag && (l->A->M == l->A->N)) { 602 localdiag = PETSC_TRUE; 603 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 604 } else { 605 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 606 } 607 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 608 ierr = ISDestroy(istmp); CHKERRQ(ierr); 609 610 if (diag && (localdiag == PETSC_FALSE)) { 611 for ( i = 0; i < slen; i++ ) { 612 row = lrows[i] + rstart; 613 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 614 } 615 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 616 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 617 } 618 619 if (diag) { 620 for ( i = 0; i < slen; i++ ) { 621 row = lrows[i] + rstart; 622 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 623 } 624 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 625 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 626 } 627 PetscFree(lrows); 628 629 /* wait on sends */ 630 if (nsends) { 631 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 632 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 633 PetscFree(send_status); 634 } 635 PetscFree(send_waits); PetscFree(svalues); 636 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNC__ 641 #define __FUNC__ "MatMult_MPIAIJ" 642 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 643 { 644 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 645 int ierr,nt; 646 647 PetscFunctionBegin; 648 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 649 if (nt != a->n) { 650 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 651 } 652 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 653 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 654 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 655 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 656 PetscFunctionReturn(0); 657 } 658 659 #undef __FUNC__ 660 #define __FUNC__ "MatMultAdd_MPIAIJ" 661 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 662 { 663 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 664 int ierr; 665 666 PetscFunctionBegin; 667 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 668 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 669 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 670 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatMultTrans_MPIAIJ" 676 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 697 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 698 { 699 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 700 int ierr; 701 702 PetscFunctionBegin; 703 /* do nondiagonal part */ 704 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 705 /* send it on its way */ 706 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 707 /* do local part */ 708 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 709 /* receive remote parts: note this assumes the values are not actually */ 710 /* inserted in yy until the next line, which is true for my implementation*/ 711 /* but is not perhaps always true. */ 712 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 /* 717 This only works correctly for square matrices where the subblock A->A is the 718 diagonal block 719 */ 720 #undef __FUNC__ 721 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 722 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 723 { 724 int ierr; 725 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 726 727 PetscFunctionBegin; 728 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 729 if (a->rstart != a->cstart || a->rend != a->cend) { 730 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 731 } 732 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNC__ 737 #define __FUNC__ "MatScale_MPIAIJ" 738 int MatScale_MPIAIJ(Scalar *aa,Mat A) 739 { 740 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 741 int ierr; 742 743 PetscFunctionBegin; 744 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 745 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 746 PetscFunctionReturn(0); 747 } 748 749 #undef __FUNC__ 750 #define __FUNC__ "MatDestroy_MPIAIJ" 751 int MatDestroy_MPIAIJ(Mat mat) 752 { 753 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 754 int ierr; 755 756 PetscFunctionBegin; 757 #if defined(USE_PETSC_LOG) 758 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 759 #endif 760 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 761 PetscFree(aij->rowners); 762 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 763 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 764 if (aij->colmap) PetscFree(aij->colmap); 765 if (aij->garray) PetscFree(aij->garray); 766 if (aij->lvec) VecDestroy(aij->lvec); 767 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 768 if (aij->rowvalues) PetscFree(aij->rowvalues); 769 PetscFree(aij); 770 PLogObjectDestroy(mat); 771 PetscHeaderDestroy(mat); 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNC__ 776 #define __FUNC__ "MatView_MPIAIJ_Binary" 777 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 778 { 779 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 780 int ierr; 781 782 PetscFunctionBegin; 783 if (aij->size == 1) { 784 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 785 } 786 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 787 PetscFunctionReturn(0); 788 } 789 790 #undef __FUNC__ 791 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 792 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 793 { 794 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 795 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 796 int ierr, format,shift = C->indexshift,rank; 797 FILE *fd; 798 ViewerType vtype; 799 800 PetscFunctionBegin; 801 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 802 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 803 ierr = ViewerGetFormat(viewer,&format); 804 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 805 MatInfo info; 806 int flg; 807 MPI_Comm_rank(mat->comm,&rank); 808 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 809 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 810 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 811 PetscSequentialPhaseBegin(mat->comm,1); 812 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 813 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 814 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 815 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 816 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 817 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 818 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 819 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 820 fflush(fd); 821 PetscSequentialPhaseEnd(mat->comm,1); 822 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 825 PetscFunctionReturn(0); 826 } 827 } 828 829 if (vtype == DRAW_VIEWER) { 830 Draw draw; 831 PetscTruth isnull; 832 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 833 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 834 } 835 836 if (vtype == ASCII_FILE_VIEWER) { 837 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 838 PetscSequentialPhaseBegin(mat->comm,1); 839 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 840 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 841 aij->cend); 842 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 843 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 844 fflush(fd); 845 PetscSequentialPhaseEnd(mat->comm,1); 846 } else { 847 int size = aij->size; 848 rank = aij->rank; 849 if (size == 1) { 850 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 851 } else { 852 /* assemble the entire matrix onto first processor. */ 853 Mat A; 854 Mat_SeqAIJ *Aloc; 855 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 856 Scalar *a; 857 858 if (!rank) { 859 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 860 } else { 861 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 862 } 863 PLogObjectParent(mat,A); 864 865 /* copy over the A part */ 866 Aloc = (Mat_SeqAIJ*) aij->A->data; 867 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 868 row = aij->rstart; 869 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 870 for ( i=0; i<m; i++ ) { 871 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 872 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 873 } 874 aj = Aloc->j; 875 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 876 877 /* copy over the B part */ 878 Aloc = (Mat_SeqAIJ*) aij->B->data; 879 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 880 row = aij->rstart; 881 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 882 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 883 for ( i=0; i<m; i++ ) { 884 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 885 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 886 } 887 PetscFree(ct); 888 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 889 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 890 /* 891 Everyone has to call to draw the matrix since the graphics waits are 892 synchronized across all processors that share the Draw object 893 */ 894 if (!rank || vtype == DRAW_VIEWER) { 895 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 896 } 897 ierr = MatDestroy(A); CHKERRQ(ierr); 898 } 899 } 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNC__ 904 #define __FUNC__ "MatView_MPIAIJ" 905 int MatView_MPIAIJ(Mat mat,Viewer viewer) 906 { 907 int ierr; 908 ViewerType vtype; 909 910 PetscFunctionBegin; 911 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 912 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 913 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 914 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 915 } else if (vtype == BINARY_FILE_VIEWER) { 916 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 917 } else { 918 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 919 } 920 PetscFunctionReturn(0); 921 } 922 923 /* 924 This has to provide several versions. 925 926 2) a) use only local smoothing updating outer values only once. 927 b) local smoothing updating outer values each inner iteration 928 3) color updating out values betwen colors. 929 */ 930 #undef __FUNC__ 931 #define __FUNC__ "MatRelax_MPIAIJ" 932 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 933 double fshift,int its,Vec xx) 934 { 935 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 936 Mat AA = mat->A, BB = mat->B; 937 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 938 Scalar *b,*x,*xs,*ls,d,*v,sum; 939 int ierr,*idx, *diag; 940 int n = mat->n, m = mat->m, i,shift = A->indexshift; 941 942 PetscFunctionBegin; 943 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 944 xs = x + shift; /* shift by one for index start of 1 */ 945 ls = ls + shift; 946 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 947 diag = A->diag; 948 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 949 if (flag & SOR_ZERO_INITIAL_GUESS) { 950 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 954 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 955 while (its--) { 956 /* go down through the rows */ 957 for ( i=0; i<m; i++ ) { 958 n = A->i[i+1] - A->i[i]; 959 idx = A->j + A->i[i] + shift; 960 v = A->a + A->i[i] + shift; 961 sum = b[i]; 962 SPARSEDENSEMDOT(sum,xs,v,idx,n); 963 d = fshift + A->a[diag[i]+shift]; 964 n = B->i[i+1] - B->i[i]; 965 idx = B->j + B->i[i] + shift; 966 v = B->a + B->i[i] + shift; 967 SPARSEDENSEMDOT(sum,ls,v,idx,n); 968 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 969 } 970 /* come up through the rows */ 971 for ( i=m-1; i>-1; i-- ) { 972 n = A->i[i+1] - A->i[i]; 973 idx = A->j + A->i[i] + shift; 974 v = A->a + A->i[i] + shift; 975 sum = b[i]; 976 SPARSEDENSEMDOT(sum,xs,v,idx,n); 977 d = fshift + A->a[diag[i]+shift]; 978 n = B->i[i+1] - B->i[i]; 979 idx = B->j + B->i[i] + shift; 980 v = B->a + B->i[i] + shift; 981 SPARSEDENSEMDOT(sum,ls,v,idx,n); 982 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 983 } 984 } 985 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 986 if (flag & SOR_ZERO_INITIAL_GUESS) { 987 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 991 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 992 while (its--) { 993 for ( i=0; i<m; i++ ) { 994 n = A->i[i+1] - A->i[i]; 995 idx = A->j + A->i[i] + shift; 996 v = A->a + A->i[i] + shift; 997 sum = b[i]; 998 SPARSEDENSEMDOT(sum,xs,v,idx,n); 999 d = fshift + A->a[diag[i]+shift]; 1000 n = B->i[i+1] - B->i[i]; 1001 idx = B->j + B->i[i] + shift; 1002 v = B->a + B->i[i] + shift; 1003 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1004 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1005 } 1006 } 1007 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1008 if (flag & SOR_ZERO_INITIAL_GUESS) { 1009 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1013 mat->Mvctx); CHKERRQ(ierr); 1014 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1015 mat->Mvctx); CHKERRQ(ierr); 1016 while (its--) { 1017 for ( i=m-1; i>-1; i-- ) { 1018 n = A->i[i+1] - A->i[i]; 1019 idx = A->j + A->i[i] + shift; 1020 v = A->a + A->i[i] + shift; 1021 sum = b[i]; 1022 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1023 d = fshift + A->a[diag[i]+shift]; 1024 n = B->i[i+1] - B->i[i]; 1025 idx = B->j + B->i[i] + shift; 1026 v = B->a + B->i[i] + shift; 1027 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1028 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1029 } 1030 } 1031 } else { 1032 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNC__ 1038 #define __FUNC__ "MatGetInfo_MPIAIJ" 1039 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1040 { 1041 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1042 Mat A = mat->A, B = mat->B; 1043 int ierr; 1044 double isend[5], irecv[5]; 1045 1046 PetscFunctionBegin; 1047 info->block_size = 1.0; 1048 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1049 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1050 isend[3] = info->memory; isend[4] = info->mallocs; 1051 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1052 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1053 isend[3] += info->memory; isend[4] += info->mallocs; 1054 if (flag == MAT_LOCAL) { 1055 info->nz_used = isend[0]; 1056 info->nz_allocated = isend[1]; 1057 info->nz_unneeded = isend[2]; 1058 info->memory = isend[3]; 1059 info->mallocs = isend[4]; 1060 } else if (flag == MAT_GLOBAL_MAX) { 1061 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1062 info->nz_used = irecv[0]; 1063 info->nz_allocated = irecv[1]; 1064 info->nz_unneeded = irecv[2]; 1065 info->memory = irecv[3]; 1066 info->mallocs = irecv[4]; 1067 } else if (flag == MAT_GLOBAL_SUM) { 1068 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1069 info->nz_used = irecv[0]; 1070 info->nz_allocated = irecv[1]; 1071 info->nz_unneeded = irecv[2]; 1072 info->memory = irecv[3]; 1073 info->mallocs = irecv[4]; 1074 } 1075 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1076 info->fill_ratio_needed = 0; 1077 info->factor_mallocs = 0; 1078 info->rows_global = (double)mat->M; 1079 info->columns_global = (double)mat->N; 1080 info->rows_local = (double)mat->m; 1081 info->columns_local = (double)mat->N; 1082 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNC__ 1087 #define __FUNC__ "MatSetOption_MPIAIJ" 1088 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1091 1092 PetscFunctionBegin; 1093 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1094 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1095 op == MAT_COLUMNS_UNSORTED || 1096 op == MAT_COLUMNS_SORTED || 1097 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1098 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1099 MatSetOption(a->A,op); 1100 MatSetOption(a->B,op); 1101 } else if (op == MAT_ROW_ORIENTED) { 1102 a->roworiented = 1; 1103 MatSetOption(a->A,op); 1104 MatSetOption(a->B,op); 1105 } else if (op == MAT_ROWS_SORTED || 1106 op == MAT_ROWS_UNSORTED || 1107 op == MAT_SYMMETRIC || 1108 op == MAT_STRUCTURALLY_SYMMETRIC || 1109 op == MAT_YES_NEW_DIAGONALS) 1110 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1111 else if (op == MAT_COLUMN_ORIENTED) { 1112 a->roworiented = 0; 1113 MatSetOption(a->A,op); 1114 MatSetOption(a->B,op); 1115 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1116 a->donotstash = 1; 1117 } else if (op == MAT_NO_NEW_DIAGONALS){ 1118 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1119 } else { 1120 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1121 } 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNC__ 1126 #define __FUNC__ "MatGetSize_MPIAIJ" 1127 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1128 { 1129 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1130 1131 PetscFunctionBegin; 1132 if (m) *m = mat->M; 1133 if (n) *n = mat->N; 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNC__ 1138 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1139 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1140 { 1141 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1142 1143 PetscFunctionBegin; 1144 if (m) *m = mat->m; 1145 if (n) *n = mat->n; 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNC__ 1150 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1151 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1152 { 1153 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1154 1155 PetscFunctionBegin; 1156 *m = mat->rstart; *n = mat->rend; 1157 PetscFunctionReturn(0); 1158 } 1159 1160 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1161 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1162 1163 #undef __FUNC__ 1164 #define __FUNC__ "MatGetRow_MPIAIJ" 1165 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1166 { 1167 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1168 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1169 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1170 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1171 int *cmap, *idx_p; 1172 1173 PetscFunctionBegin; 1174 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1175 mat->getrowactive = PETSC_TRUE; 1176 1177 if (!mat->rowvalues && (idx || v)) { 1178 /* 1179 allocate enough space to hold information from the longest row. 1180 */ 1181 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1182 int max = 1,m = mat->m,tmp; 1183 for ( i=0; i<m; i++ ) { 1184 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1185 if (max < tmp) { max = tmp; } 1186 } 1187 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1188 CHKPTRQ(mat->rowvalues); 1189 mat->rowindices = (int *) (mat->rowvalues + max); 1190 } 1191 1192 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1193 lrow = row - rstart; 1194 1195 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1196 if (!v) {pvA = 0; pvB = 0;} 1197 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1198 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1199 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1200 nztot = nzA + nzB; 1201 1202 cmap = mat->garray; 1203 if (v || idx) { 1204 if (nztot) { 1205 /* Sort by increasing column numbers, assuming A and B already sorted */ 1206 int imark = -1; 1207 if (v) { 1208 *v = v_p = mat->rowvalues; 1209 for ( i=0; i<nzB; i++ ) { 1210 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1211 else break; 1212 } 1213 imark = i; 1214 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1215 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1216 } 1217 if (idx) { 1218 *idx = idx_p = mat->rowindices; 1219 if (imark > -1) { 1220 for ( i=0; i<imark; i++ ) { 1221 idx_p[i] = cmap[cworkB[i]]; 1222 } 1223 } else { 1224 for ( i=0; i<nzB; i++ ) { 1225 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1226 else break; 1227 } 1228 imark = i; 1229 } 1230 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1231 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1232 } 1233 } 1234 else { 1235 if (idx) *idx = 0; 1236 if (v) *v = 0; 1237 } 1238 } 1239 *nz = nztot; 1240 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1241 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNC__ 1246 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1247 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1248 { 1249 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1250 1251 PetscFunctionBegin; 1252 if (aij->getrowactive == PETSC_FALSE) { 1253 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1254 } 1255 aij->getrowactive = PETSC_FALSE; 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNC__ 1260 #define __FUNC__ "MatNorm_MPIAIJ" 1261 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1262 { 1263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1264 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1265 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1266 double sum = 0.0; 1267 Scalar *v; 1268 1269 PetscFunctionBegin; 1270 if (aij->size == 1) { 1271 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1272 } else { 1273 if (type == NORM_FROBENIUS) { 1274 v = amat->a; 1275 for (i=0; i<amat->nz; i++ ) { 1276 #if defined(USE_PETSC_COMPLEX) 1277 sum += real(conj(*v)*(*v)); v++; 1278 #else 1279 sum += (*v)*(*v); v++; 1280 #endif 1281 } 1282 v = bmat->a; 1283 for (i=0; i<bmat->nz; i++ ) { 1284 #if defined(USE_PETSC_COMPLEX) 1285 sum += real(conj(*v)*(*v)); v++; 1286 #else 1287 sum += (*v)*(*v); v++; 1288 #endif 1289 } 1290 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1291 *norm = sqrt(*norm); 1292 } else if (type == NORM_1) { /* max column norm */ 1293 double *tmp, *tmp2; 1294 int *jj, *garray = aij->garray; 1295 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1296 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1297 PetscMemzero(tmp,aij->N*sizeof(double)); 1298 *norm = 0.0; 1299 v = amat->a; jj = amat->j; 1300 for ( j=0; j<amat->nz; j++ ) { 1301 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1302 } 1303 v = bmat->a; jj = bmat->j; 1304 for ( j=0; j<bmat->nz; j++ ) { 1305 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1306 } 1307 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1308 for ( j=0; j<aij->N; j++ ) { 1309 if (tmp2[j] > *norm) *norm = tmp2[j]; 1310 } 1311 PetscFree(tmp); PetscFree(tmp2); 1312 } else if (type == NORM_INFINITY) { /* max row norm */ 1313 double ntemp = 0.0; 1314 for ( j=0; j<amat->m; j++ ) { 1315 v = amat->a + amat->i[j] + shift; 1316 sum = 0.0; 1317 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1318 sum += PetscAbsScalar(*v); v++; 1319 } 1320 v = bmat->a + bmat->i[j] + shift; 1321 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1322 sum += PetscAbsScalar(*v); v++; 1323 } 1324 if (sum > ntemp) ntemp = sum; 1325 } 1326 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1327 } else { 1328 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1329 } 1330 } 1331 PetscFunctionReturn(0); 1332 } 1333 1334 #undef __FUNC__ 1335 #define __FUNC__ "MatTranspose_MPIAIJ" 1336 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1337 { 1338 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1339 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1340 int ierr,shift = Aloc->indexshift; 1341 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1342 Mat B; 1343 Scalar *array; 1344 1345 PetscFunctionBegin; 1346 if (matout == PETSC_NULL && M != N) { 1347 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1348 } 1349 1350 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1351 1352 /* copy over the A part */ 1353 Aloc = (Mat_SeqAIJ*) a->A->data; 1354 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1355 row = a->rstart; 1356 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1357 for ( i=0; i<m; i++ ) { 1358 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1359 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1360 } 1361 aj = Aloc->j; 1362 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1363 1364 /* copy over the B part */ 1365 Aloc = (Mat_SeqAIJ*) a->B->data; 1366 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1367 row = a->rstart; 1368 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1369 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1370 for ( i=0; i<m; i++ ) { 1371 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1372 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1373 } 1374 PetscFree(ct); 1375 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1376 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1377 if (matout != PETSC_NULL) { 1378 *matout = B; 1379 } else { 1380 PetscOps *Abops; 1381 struct _MatOps *Aops; 1382 1383 /* This isn't really an in-place transpose .... but free data structures from a */ 1384 PetscFree(a->rowners); 1385 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1386 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1387 if (a->colmap) PetscFree(a->colmap); 1388 if (a->garray) PetscFree(a->garray); 1389 if (a->lvec) VecDestroy(a->lvec); 1390 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1391 PetscFree(a); 1392 1393 /* 1394 This is horrible, horrible code. We need to keep the 1395 A pointers for the bops and ops but copy everything 1396 else from C. 1397 */ 1398 Abops = A->bops; 1399 Aops = A->ops; 1400 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1401 A->bops = Abops; 1402 A->ops = Aops; 1403 PetscHeaderDestroy(B); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 #undef __FUNC__ 1409 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1410 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1411 { 1412 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1413 Mat a = aij->A, b = aij->B; 1414 int ierr,s1,s2,s3; 1415 1416 PetscFunctionBegin; 1417 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1418 if (rr) { 1419 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1420 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1421 /* Overlap communication with computation. */ 1422 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1423 } 1424 if (ll) { 1425 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1426 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1427 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1428 } 1429 /* scale the diagonal block */ 1430 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1431 1432 if (rr) { 1433 /* Do a scatter end and then right scale the off-diagonal block */ 1434 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1435 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1436 } 1437 1438 PetscFunctionReturn(0); 1439 } 1440 1441 1442 extern int MatPrintHelp_SeqAIJ(Mat); 1443 #undef __FUNC__ 1444 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1445 int MatPrintHelp_MPIAIJ(Mat A) 1446 { 1447 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1448 int ierr; 1449 1450 PetscFunctionBegin; 1451 if (!a->rank) { 1452 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } 1456 1457 #undef __FUNC__ 1458 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1459 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1460 { 1461 PetscFunctionBegin; 1462 *bs = 1; 1463 PetscFunctionReturn(0); 1464 } 1465 #undef __FUNC__ 1466 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1467 int MatSetUnfactored_MPIAIJ(Mat A) 1468 { 1469 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1470 int ierr; 1471 1472 PetscFunctionBegin; 1473 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNC__ 1478 #define __FUNC__ "MatEqual_MPIAIJ" 1479 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1480 { 1481 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1482 Mat a, b, c, d; 1483 PetscTruth flg; 1484 int ierr; 1485 1486 PetscFunctionBegin; 1487 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1488 a = matA->A; b = matA->B; 1489 c = matB->A; d = matB->B; 1490 1491 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1492 if (flg == PETSC_TRUE) { 1493 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1494 } 1495 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1500 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1501 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1502 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1503 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1504 1505 /* -------------------------------------------------------------------*/ 1506 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1507 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1508 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1509 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1510 0,0, 1511 0,0, 1512 0,0, 1513 MatRelax_MPIAIJ, 1514 MatTranspose_MPIAIJ, 1515 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1516 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1517 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1518 0, 1519 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1520 0,0,0,0, 1521 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1522 0,0, 1523 0,0,MatConvertSameType_MPIAIJ,0,0, 1524 0,0,0, 1525 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1526 MatPrintHelp_MPIAIJ, 1527 MatScale_MPIAIJ,0,0,0, 1528 MatGetBlockSize_MPIAIJ,0,0,0,0, 1529 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1530 0,0,MatGetSubMatrix_MPIAIJ}; 1531 1532 1533 #undef __FUNC__ 1534 #define __FUNC__ "MatCreateMPIAIJ" 1535 /*@C 1536 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1537 (the default parallel PETSc format). For good matrix assembly performance 1538 the user should preallocate the matrix storage by setting the parameters 1539 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1540 performance can be increased by more than a factor of 50. 1541 1542 Collective on MPI_Comm 1543 1544 Input Parameters: 1545 + comm - MPI communicator 1546 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1547 This value should be the same as the local size used in creating the 1548 y vector for the matrix-vector product y = Ax. 1549 . n - This value should be the same as the local size used in creating the 1550 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1551 calculated if N is given) For square matrices n is almost always m. 1552 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1553 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1554 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1555 (same for all local rows) 1556 . d_nzz - array containing the number of nonzeros in the various rows of the 1557 diagonal portion of the local submatrix (possibly different for each row) 1558 or PETSC_NULL. You must leave room for the diagonal entry even if 1559 it is zero. 1560 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1561 submatrix (same for all local rows). 1562 - o_nzz - array containing the number of nonzeros in the various rows of the 1563 off-diagonal portion of the local submatrix (possibly different for 1564 each row) or PETSC_NULL. 1565 1566 Output Parameter: 1567 . A - the matrix 1568 1569 Notes: 1570 The AIJ format (also called the Yale sparse matrix format or 1571 compressed row storage), is fully compatible with standard Fortran 77 1572 storage. That is, the stored row and column indices can begin at 1573 either one (as in Fortran) or zero. See the users manual for details. 1574 1575 The user MUST specify either the local or global matrix dimensions 1576 (possibly both). 1577 1578 By default, this format uses inodes (identical nodes) when possible. 1579 We search for consecutive rows with the same nonzero structure, thereby 1580 reusing matrix information to achieve increased efficiency. 1581 1582 Options Database Keys: 1583 + -mat_aij_no_inode - Do not use inodes 1584 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1585 - -mat_aij_oneindex - Internally use indexing starting at 1 1586 rather than 0. Note that when calling MatSetValues(), 1587 the user still MUST index entries starting at 0! 1588 1589 Storage Information: 1590 For a square global matrix we define each processor's diagonal portion 1591 to be its local rows and the corresponding columns (a square submatrix); 1592 each processor's off-diagonal portion encompasses the remainder of the 1593 local matrix (a rectangular submatrix). 1594 1595 The user can specify preallocated storage for the diagonal part of 1596 the local submatrix with either d_nz or d_nnz (not both). Set 1597 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1598 memory allocation. Likewise, specify preallocated storage for the 1599 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1600 1601 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1602 the figure below we depict these three local rows and all columns (0-11). 1603 1604 .vb 1605 0 1 2 3 4 5 6 7 8 9 10 11 1606 ------------------- 1607 row 3 | o o o d d d o o o o o o 1608 row 4 | o o o d d d o o o o o o 1609 row 5 | o o o d d d o o o o o o 1610 ------------------- 1611 .ve 1612 1613 Thus, any entries in the d locations are stored in the d (diagonal) 1614 submatrix, and any entries in the o locations are stored in the 1615 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1616 stored simply in the MATSEQAIJ format for compressed row storage. 1617 1618 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1619 and o_nz should indicate the number of nonzeros per row in the o matrix. 1620 In general, for PDE problems in which most nonzeros are near the diagonal, 1621 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1622 or you will get TERRIBLE performance; see the users' manual chapter on 1623 matrices. 1624 1625 .keywords: matrix, aij, compressed row, sparse, parallel 1626 1627 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1628 @*/ 1629 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1630 { 1631 Mat B; 1632 Mat_MPIAIJ *b; 1633 int ierr, i,sum[2],work[2],size; 1634 1635 PetscFunctionBegin; 1636 MPI_Comm_size(comm,&size); 1637 if (size == 1) { 1638 if (M == PETSC_DECIDE) M = m; 1639 if (N == PETSC_DECIDE) N = n; 1640 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1641 PetscFunctionReturn(0); 1642 } 1643 1644 *A = 0; 1645 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1646 PLogObjectCreate(B); 1647 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1648 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1649 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1650 B->ops->destroy = MatDestroy_MPIAIJ; 1651 B->ops->view = MatView_MPIAIJ; 1652 B->factor = 0; 1653 B->assembled = PETSC_FALSE; 1654 B->mapping = 0; 1655 1656 B->insertmode = NOT_SET_VALUES; 1657 b->size = size; 1658 MPI_Comm_rank(comm,&b->rank); 1659 1660 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1661 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1662 } 1663 1664 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1665 work[0] = m; work[1] = n; 1666 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1667 if (M == PETSC_DECIDE) M = sum[0]; 1668 if (N == PETSC_DECIDE) N = sum[1]; 1669 } 1670 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1671 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1672 b->m = m; B->m = m; 1673 b->n = n; B->n = n; 1674 b->N = N; B->N = N; 1675 b->M = M; B->M = M; 1676 1677 /* build local table of row and column ownerships */ 1678 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1679 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1680 b->cowners = b->rowners + b->size + 2; 1681 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1682 b->rowners[0] = 0; 1683 for ( i=2; i<=b->size; i++ ) { 1684 b->rowners[i] += b->rowners[i-1]; 1685 } 1686 b->rstart = b->rowners[b->rank]; 1687 b->rend = b->rowners[b->rank+1]; 1688 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1689 b->cowners[0] = 0; 1690 for ( i=2; i<=b->size; i++ ) { 1691 b->cowners[i] += b->cowners[i-1]; 1692 } 1693 b->cstart = b->cowners[b->rank]; 1694 b->cend = b->cowners[b->rank+1]; 1695 1696 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1697 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1698 PLogObjectParent(B,b->A); 1699 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1700 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1701 PLogObjectParent(B,b->B); 1702 1703 /* build cache for off array entries formed */ 1704 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1705 b->donotstash = 0; 1706 b->colmap = 0; 1707 b->garray = 0; 1708 b->roworiented = 1; 1709 1710 /* stuff used for matrix vector multiply */ 1711 b->lvec = 0; 1712 b->Mvctx = 0; 1713 1714 /* stuff for MatGetRow() */ 1715 b->rowindices = 0; 1716 b->rowvalues = 0; 1717 b->getrowactive = PETSC_FALSE; 1718 1719 *A = B; 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNC__ 1724 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1725 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1726 { 1727 Mat mat; 1728 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1729 int ierr, len=0, flg; 1730 1731 PetscFunctionBegin; 1732 *newmat = 0; 1733 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1734 PLogObjectCreate(mat); 1735 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1736 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1737 mat->ops->destroy = MatDestroy_MPIAIJ; 1738 mat->ops->view = MatView_MPIAIJ; 1739 mat->factor = matin->factor; 1740 mat->assembled = PETSC_TRUE; 1741 1742 a->m = mat->m = oldmat->m; 1743 a->n = mat->n = oldmat->n; 1744 a->M = mat->M = oldmat->M; 1745 a->N = mat->N = oldmat->N; 1746 1747 a->rstart = oldmat->rstart; 1748 a->rend = oldmat->rend; 1749 a->cstart = oldmat->cstart; 1750 a->cend = oldmat->cend; 1751 a->size = oldmat->size; 1752 a->rank = oldmat->rank; 1753 mat->insertmode = NOT_SET_VALUES; 1754 a->rowvalues = 0; 1755 a->getrowactive = PETSC_FALSE; 1756 1757 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1758 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1759 a->cowners = a->rowners + a->size + 2; 1760 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1761 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1762 if (oldmat->colmap) { 1763 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1764 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1765 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1766 } else a->colmap = 0; 1767 if (oldmat->garray) { 1768 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1769 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1770 PLogObjectMemory(mat,len*sizeof(int)); 1771 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1772 } else a->garray = 0; 1773 1774 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1775 PLogObjectParent(mat,a->lvec); 1776 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1777 PLogObjectParent(mat,a->Mvctx); 1778 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1779 PLogObjectParent(mat,a->A); 1780 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1781 PLogObjectParent(mat,a->B); 1782 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1783 if (flg) { 1784 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1785 } 1786 *newmat = mat; 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #include "sys.h" 1791 1792 #undef __FUNC__ 1793 #define __FUNC__ "MatLoad_MPIAIJ" 1794 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1795 { 1796 Mat A; 1797 Scalar *vals,*svals; 1798 MPI_Comm comm = ((PetscObject)viewer)->comm; 1799 MPI_Status status; 1800 int i, nz, ierr, j,rstart, rend, fd; 1801 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1802 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1803 int tag = ((PetscObject)viewer)->tag; 1804 1805 PetscFunctionBegin; 1806 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1807 if (!rank) { 1808 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1809 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1810 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1811 if (header[3] < 0) { 1812 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1813 } 1814 } 1815 1816 1817 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1818 M = header[1]; N = header[2]; 1819 /* determine ownership of all rows */ 1820 m = M/size + ((M % size) > rank); 1821 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1822 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1823 rowners[0] = 0; 1824 for ( i=2; i<=size; i++ ) { 1825 rowners[i] += rowners[i-1]; 1826 } 1827 rstart = rowners[rank]; 1828 rend = rowners[rank+1]; 1829 1830 /* distribute row lengths to all processors */ 1831 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1832 offlens = ourlens + (rend-rstart); 1833 if (!rank) { 1834 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1835 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1836 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1837 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1838 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1839 PetscFree(sndcounts); 1840 } else { 1841 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1842 } 1843 1844 if (!rank) { 1845 /* calculate the number of nonzeros on each processor */ 1846 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1847 PetscMemzero(procsnz,size*sizeof(int)); 1848 for ( i=0; i<size; i++ ) { 1849 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1850 procsnz[i] += rowlengths[j]; 1851 } 1852 } 1853 PetscFree(rowlengths); 1854 1855 /* determine max buffer needed and allocate it */ 1856 maxnz = 0; 1857 for ( i=0; i<size; i++ ) { 1858 maxnz = PetscMax(maxnz,procsnz[i]); 1859 } 1860 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1861 1862 /* read in my part of the matrix column indices */ 1863 nz = procsnz[0]; 1864 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1865 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1866 1867 /* read in every one elses and ship off */ 1868 for ( i=1; i<size; i++ ) { 1869 nz = procsnz[i]; 1870 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1871 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1872 } 1873 PetscFree(cols); 1874 } else { 1875 /* determine buffer space needed for message */ 1876 nz = 0; 1877 for ( i=0; i<m; i++ ) { 1878 nz += ourlens[i]; 1879 } 1880 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1881 1882 /* receive message of column indices*/ 1883 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1884 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1885 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1886 } 1887 1888 /* loop over local rows, determining number of off diagonal entries */ 1889 PetscMemzero(offlens,m*sizeof(int)); 1890 jj = 0; 1891 for ( i=0; i<m; i++ ) { 1892 for ( j=0; j<ourlens[i]; j++ ) { 1893 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1894 jj++; 1895 } 1896 } 1897 1898 /* create our matrix */ 1899 for ( i=0; i<m; i++ ) { 1900 ourlens[i] -= offlens[i]; 1901 } 1902 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1903 A = *newmat; 1904 MatSetOption(A,MAT_COLUMNS_SORTED); 1905 for ( i=0; i<m; i++ ) { 1906 ourlens[i] += offlens[i]; 1907 } 1908 1909 if (!rank) { 1910 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1911 1912 /* read in my part of the matrix numerical values */ 1913 nz = procsnz[0]; 1914 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1915 1916 /* insert into matrix */ 1917 jj = rstart; 1918 smycols = mycols; 1919 svals = vals; 1920 for ( i=0; i<m; i++ ) { 1921 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1922 smycols += ourlens[i]; 1923 svals += ourlens[i]; 1924 jj++; 1925 } 1926 1927 /* read in other processors and ship out */ 1928 for ( i=1; i<size; i++ ) { 1929 nz = procsnz[i]; 1930 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1931 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1932 } 1933 PetscFree(procsnz); 1934 } else { 1935 /* receive numeric values */ 1936 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1937 1938 /* receive message of values*/ 1939 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1940 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1941 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1942 1943 /* insert into matrix */ 1944 jj = rstart; 1945 smycols = mycols; 1946 svals = vals; 1947 for ( i=0; i<m; i++ ) { 1948 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1949 smycols += ourlens[i]; 1950 svals += ourlens[i]; 1951 jj++; 1952 } 1953 } 1954 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1955 1956 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1957 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNC__ 1962 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1963 /* 1964 Not great since it makes two copies of the submatrix, first an SeqAIJ 1965 in local and then by concatenating the local matrices the end result. 1966 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1967 */ 1968 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1969 { 1970 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1971 Mat *local,M, Mreuse; 1972 Scalar *vwork,*aa; 1973 MPI_Comm comm = mat->comm; 1974 Mat_SeqAIJ *aij; 1975 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1976 1977 PetscFunctionBegin; 1978 MPI_Comm_rank(comm,&rank); 1979 MPI_Comm_size(comm,&size); 1980 1981 if (call == MAT_REUSE_MATRIX) { 1982 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1983 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 1984 local = &Mreuse; 1985 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1986 } else { 1987 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1988 Mreuse = *local; 1989 PetscFree(local); 1990 } 1991 1992 /* 1993 m - number of local rows 1994 n - number of columns (same on all processors) 1995 rstart - first row in new global matrix generated 1996 */ 1997 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1998 if (call == MAT_INITIAL_MATRIX) { 1999 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2000 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2001 ii = aij->i; 2002 jj = aij->j; 2003 2004 /* 2005 Determine the number of non-zeros in the diagonal and off-diagonal 2006 portions of the matrix in order to do correct preallocation 2007 */ 2008 2009 /* first get start and end of "diagonal" columns */ 2010 if (csize == PETSC_DECIDE) { 2011 nlocal = n/size + ((n % size) > rank); 2012 } else { 2013 nlocal = csize; 2014 } 2015 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2016 rstart = rend - nlocal; 2017 if (rank == size - 1 && rend != n) { 2018 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2019 } 2020 2021 /* next, compute all the lengths */ 2022 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2023 olens = dlens + m; 2024 for ( i=0; i<m; i++ ) { 2025 jend = ii[i+1] - ii[i]; 2026 olen = 0; 2027 dlen = 0; 2028 for ( j=0; j<jend; j++ ) { 2029 if ( *jj < rstart || *jj >= rend) olen++; 2030 else dlen++; 2031 jj++; 2032 } 2033 olens[i] = olen; 2034 dlens[i] = dlen; 2035 } 2036 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2037 PetscFree(dlens); 2038 } else { 2039 int ml,nl; 2040 2041 M = *newmat; 2042 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2043 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2044 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2045 /* 2046 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2047 rather than the slower MatSetValues(). 2048 */ 2049 M->was_assembled = PETSC_TRUE; 2050 M->assembled = PETSC_FALSE; 2051 } 2052 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2053 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2054 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2055 ii = aij->i; 2056 jj = aij->j; 2057 aa = aij->a; 2058 for (i=0; i<m; i++) { 2059 row = rstart + i; 2060 nz = ii[i+1] - ii[i]; 2061 cwork = jj; jj += nz; 2062 vwork = aa; aa += nz; 2063 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2064 } 2065 2066 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2067 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2068 *newmat = M; 2069 2070 /* save submatrix used in processor for next request */ 2071 if (call == MAT_INITIAL_MATRIX) { 2072 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2073 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2074 } 2075 2076 PetscFunctionReturn(0); 2077 } 2078