1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.256 1998/07/14 21:10:01 bsmith Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } else { 282 if (!aij->colmap) { 283 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 col = aij->colmap[idxn[j]] - 1; 286 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 287 else { 288 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 289 } 290 } 291 } 292 } else { 293 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 294 } 295 } 296 PetscFunctionReturn(0); 297 } 298 299 #undef __FUNC__ 300 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 301 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 304 MPI_Comm comm = mat->comm; 305 int size = aij->size, *owners = aij->rowners; 306 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 307 MPI_Request *send_waits,*recv_waits; 308 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 309 InsertMode addv; 310 Scalar *rvalues,*svalues; 311 312 PetscFunctionBegin; 313 if (aij->donotstash) { 314 aij->svalues = 0; aij->rvalues = 0; 315 aij->nsends = 0; aij->nrecvs = 0; 316 aij->send_waits = 0; aij->recv_waits = 0; 317 aij->rmax = 0; 318 PetscFunctionReturn(0); 319 } 320 321 /* make sure all processors are either in INSERTMODE or ADDMODE */ 322 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 323 if (addv == (ADD_VALUES|INSERT_VALUES)) { 324 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 325 } 326 mat->insertmode = addv; /* in case this processor had no cache */ 327 328 /* first count number of contributors to each processor */ 329 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 330 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 331 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 332 for ( i=0; i<aij->stash.n; i++ ) { 333 idx = aij->stash.idx[i]; 334 for ( j=0; j<size; j++ ) { 335 if (idx >= owners[j] && idx < owners[j+1]) { 336 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 337 } 338 } 339 } 340 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 341 342 /* inform other processors of number of messages and max length*/ 343 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 344 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 345 nreceives = work[rank]; 346 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 347 nmax = work[rank]; 348 PetscFree(work); 349 350 /* post receives: 351 1) each message will consist of ordered pairs 352 (global index,value) we store the global index as a double 353 to simplify the message passing. 354 2) since we don't know how long each individual message is we 355 allocate the largest needed buffer for each receive. Potentially 356 this is a lot of wasted space. 357 358 359 This could be done better. 360 */ 361 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 362 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 363 for ( i=0; i<nreceives; i++ ) { 364 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 365 comm,recv_waits+i);CHKERRQ(ierr); 366 } 367 368 /* do sends: 369 1) starts[i] gives the starting index in svalues for stuff going to 370 the ith processor 371 */ 372 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 373 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 374 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 375 starts[0] = 0; 376 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 377 for ( i=0; i<aij->stash.n; i++ ) { 378 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 379 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 380 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 381 } 382 PetscFree(owner); 383 starts[0] = 0; 384 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 385 count = 0; 386 for ( i=0; i<size; i++ ) { 387 if (procs[i]) { 388 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 389 } 390 } 391 PetscFree(starts); 392 PetscFree(nprocs); 393 394 /* Free cache space */ 395 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 396 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 397 398 aij->svalues = svalues; aij->rvalues = rvalues; 399 aij->nsends = nsends; aij->nrecvs = nreceives; 400 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 401 aij->rmax = nmax; 402 403 PetscFunctionReturn(0); 404 } 405 extern int MatSetUpMultiply_MPIAIJ(Mat); 406 407 #undef __FUNC__ 408 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 409 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 412 MPI_Status *send_status,recv_status; 413 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 414 int row,col,other_disassembled; 415 Scalar *values,val; 416 InsertMode addv = mat->insertmode; 417 418 PetscFunctionBegin; 419 420 /* wait on receives */ 421 while (count) { 422 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 423 /* unpack receives into our local space */ 424 values = aij->rvalues + 3*imdex*aij->rmax; 425 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 426 n = n/3; 427 for ( i=0; i<n; i++ ) { 428 row = (int) PetscReal(values[3*i]) - aij->rstart; 429 col = (int) PetscReal(values[3*i+1]); 430 val = values[3*i+2]; 431 if (col >= aij->cstart && col < aij->cend) { 432 col -= aij->cstart; 433 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 434 } else { 435 if (mat->was_assembled || mat->assembled) { 436 if (!aij->colmap) { 437 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 438 } 439 col = aij->colmap[col] - 1; 440 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 441 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 442 col = (int) PetscReal(values[3*i+1]); 443 } 444 } 445 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 446 } 447 } 448 count--; 449 } 450 if (aij->recv_waits) PetscFree(aij->recv_waits); 451 if (aij->rvalues) PetscFree(aij->rvalues); 452 453 /* wait on sends */ 454 if (aij->nsends) { 455 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 456 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 457 PetscFree(send_status); 458 } 459 if (aij->send_waits) PetscFree(aij->send_waits); 460 if (aij->svalues) PetscFree(aij->svalues); 461 462 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 463 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 464 465 /* determine if any processor has disassembled, if so we must 466 also disassemble ourselfs, in order that we may reassemble. */ 467 /* 468 if nonzero structure of submatrix B cannot change then we know that 469 no processor disassembled thus we can skip this stuff 470 */ 471 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 472 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 473 if (mat->was_assembled && !other_disassembled) { 474 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 475 } 476 } 477 478 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 479 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 480 } 481 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 482 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 483 484 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNC__ 489 #define __FUNC__ "MatZeroEntries_MPIAIJ" 490 int MatZeroEntries_MPIAIJ(Mat A) 491 { 492 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 493 int ierr; 494 495 PetscFunctionBegin; 496 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 497 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNC__ 502 #define __FUNC__ "MatZeroRows_MPIAIJ" 503 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 504 { 505 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 506 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 507 int *procs,*nprocs,j,found,idx,nsends,*work,row; 508 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 509 int *rvalues,tag = A->tag,count,base,slen,n,*source; 510 int *lens,imdex,*lrows,*values,rstart=l->rstart; 511 MPI_Comm comm = A->comm; 512 MPI_Request *send_waits,*recv_waits; 513 MPI_Status recv_status,*send_status; 514 IS istmp; 515 PetscTruth localdiag; 516 517 PetscFunctionBegin; 518 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 519 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 520 521 /* first count number of contributors to each processor */ 522 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 523 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 524 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 525 for ( i=0; i<N; i++ ) { 526 idx = rows[i]; 527 found = 0; 528 for ( j=0; j<size; j++ ) { 529 if (idx >= owners[j] && idx < owners[j+1]) { 530 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 531 } 532 } 533 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 534 } 535 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 536 537 /* inform other processors of number of messages and max length*/ 538 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 539 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 540 nrecvs = work[rank]; 541 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 542 nmax = work[rank]; 543 PetscFree(work); 544 545 /* post receives: */ 546 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 547 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 548 for ( i=0; i<nrecvs; i++ ) { 549 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 550 } 551 552 /* do sends: 553 1) starts[i] gives the starting index in svalues for stuff going to 554 the ith processor 555 */ 556 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 557 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 558 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 559 starts[0] = 0; 560 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 561 for ( i=0; i<N; i++ ) { 562 svalues[starts[owner[i]]++] = rows[i]; 563 } 564 ISRestoreIndices(is,&rows); 565 566 starts[0] = 0; 567 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 568 count = 0; 569 for ( i=0; i<size; i++ ) { 570 if (procs[i]) { 571 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 572 } 573 } 574 PetscFree(starts); 575 576 base = owners[rank]; 577 578 /* wait on receives */ 579 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 580 source = lens + nrecvs; 581 count = nrecvs; slen = 0; 582 while (count) { 583 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 584 /* unpack receives into our local space */ 585 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 586 source[imdex] = recv_status.MPI_SOURCE; 587 lens[imdex] = n; 588 slen += n; 589 count--; 590 } 591 PetscFree(recv_waits); 592 593 /* move the data into the send scatter */ 594 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 595 count = 0; 596 for ( i=0; i<nrecvs; i++ ) { 597 values = rvalues + i*nmax; 598 for ( j=0; j<lens[i]; j++ ) { 599 lrows[count++] = values[j] - base; 600 } 601 } 602 PetscFree(rvalues); PetscFree(lens); 603 PetscFree(owner); PetscFree(nprocs); 604 605 /* actually zap the local rows */ 606 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 607 PLogObjectParent(A,istmp); 608 609 /* 610 Zero the required rows. If the "diagonal block" of the matrix 611 is square and the user wishes to set the diagonal we use seperate 612 code so that MatSetValues() is not called for each diagonal allocating 613 new memory, thus calling lots of mallocs and slowing things down. 614 615 Contributed by: Mathew Knepley 616 */ 617 localdiag = PETSC_FALSE; 618 if (diag && (l->A->M == l->A->N)) { 619 localdiag = PETSC_TRUE; 620 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 621 } else { 622 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 623 } 624 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 625 ierr = ISDestroy(istmp); CHKERRQ(ierr); 626 627 if (diag && (localdiag == PETSC_FALSE)) { 628 for ( i = 0; i < slen; i++ ) { 629 row = lrows[i] + rstart; 630 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 631 } 632 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 633 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 634 } 635 636 if (diag) { 637 for ( i = 0; i < slen; i++ ) { 638 row = lrows[i] + rstart; 639 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 640 } 641 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 642 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 643 } 644 PetscFree(lrows); 645 646 /* wait on sends */ 647 if (nsends) { 648 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 649 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 650 PetscFree(send_status); 651 } 652 PetscFree(send_waits); PetscFree(svalues); 653 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatMult_MPIAIJ" 659 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 660 { 661 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 662 int ierr,nt; 663 664 PetscFunctionBegin; 665 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 666 if (nt != a->n) { 667 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 668 } 669 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 670 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 671 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 672 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNC__ 677 #define __FUNC__ "MatMultAdd_MPIAIJ" 678 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 681 int ierr; 682 683 PetscFunctionBegin; 684 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 685 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 686 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 687 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNC__ 692 #define __FUNC__ "MatMultTrans_MPIAIJ" 693 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 696 int ierr; 697 698 PetscFunctionBegin; 699 /* do nondiagonal part */ 700 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 701 /* send it on its way */ 702 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 703 /* do local part */ 704 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 705 /* receive remote parts: note this assumes the values are not actually */ 706 /* inserted in yy until the next line, which is true for my implementation*/ 707 /* but is not perhaps always true. */ 708 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 714 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 717 int ierr; 718 719 PetscFunctionBegin; 720 /* do nondiagonal part */ 721 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 722 /* send it on its way */ 723 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 724 /* do local part */ 725 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 726 /* receive remote parts: note this assumes the values are not actually */ 727 /* inserted in yy until the next line, which is true for my implementation*/ 728 /* but is not perhaps always true. */ 729 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 /* 734 This only works correctly for square matrices where the subblock A->A is the 735 diagonal block 736 */ 737 #undef __FUNC__ 738 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 739 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 740 { 741 int ierr; 742 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 743 744 PetscFunctionBegin; 745 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 746 if (a->rstart != a->cstart || a->rend != a->cend) { 747 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 748 } 749 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNC__ 754 #define __FUNC__ "MatScale_MPIAIJ" 755 int MatScale_MPIAIJ(Scalar *aa,Mat A) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 758 int ierr; 759 760 PetscFunctionBegin; 761 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 762 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNC__ 767 #define __FUNC__ "MatDestroy_MPIAIJ" 768 int MatDestroy_MPIAIJ(Mat mat) 769 { 770 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 771 int ierr; 772 773 PetscFunctionBegin; 774 if (--mat->refct > 0) PetscFunctionReturn(0); 775 776 if (mat->mapping) { 777 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 778 } 779 if (mat->bmapping) { 780 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 781 } 782 if (mat->rmap) { 783 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 784 } 785 if (mat->cmap) { 786 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 787 } 788 #if defined(USE_PETSC_LOG) 789 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 790 #endif 791 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 792 PetscFree(aij->rowners); 793 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 794 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 795 if (aij->colmap) PetscFree(aij->colmap); 796 if (aij->garray) PetscFree(aij->garray); 797 if (aij->lvec) VecDestroy(aij->lvec); 798 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 799 if (aij->rowvalues) PetscFree(aij->rowvalues); 800 PetscFree(aij); 801 PLogObjectDestroy(mat); 802 PetscHeaderDestroy(mat); 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNC__ 807 #define __FUNC__ "MatView_MPIAIJ_Binary" 808 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 809 { 810 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 811 int ierr; 812 813 PetscFunctionBegin; 814 if (aij->size == 1) { 815 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 816 } 817 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNC__ 822 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 823 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 824 { 825 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 826 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 827 int ierr, format,shift = C->indexshift,rank; 828 FILE *fd; 829 ViewerType vtype; 830 831 PetscFunctionBegin; 832 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 833 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 834 ierr = ViewerGetFormat(viewer,&format); 835 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 836 MatInfo info; 837 int flg; 838 MPI_Comm_rank(mat->comm,&rank); 839 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 840 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 841 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 842 PetscSequentialPhaseBegin(mat->comm,1); 843 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 844 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 845 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 846 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 847 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 848 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 849 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 850 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 851 fflush(fd); 852 PetscSequentialPhaseEnd(mat->comm,1); 853 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 856 PetscFunctionReturn(0); 857 } 858 } 859 860 if (vtype == DRAW_VIEWER) { 861 Draw draw; 862 PetscTruth isnull; 863 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 864 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 865 } 866 867 if (vtype == ASCII_FILE_VIEWER) { 868 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 869 PetscSequentialPhaseBegin(mat->comm,1); 870 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 871 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 872 aij->cend); 873 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 874 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 875 fflush(fd); 876 PetscSequentialPhaseEnd(mat->comm,1); 877 } else { 878 int size = aij->size; 879 rank = aij->rank; 880 if (size == 1) { 881 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 882 } else { 883 /* assemble the entire matrix onto first processor. */ 884 Mat A; 885 Mat_SeqAIJ *Aloc; 886 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 887 Scalar *a; 888 889 if (!rank) { 890 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 891 } else { 892 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 893 } 894 PLogObjectParent(mat,A); 895 896 /* copy over the A part */ 897 Aloc = (Mat_SeqAIJ*) aij->A->data; 898 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 899 row = aij->rstart; 900 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 901 for ( i=0; i<m; i++ ) { 902 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 903 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 904 } 905 aj = Aloc->j; 906 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 907 908 /* copy over the B part */ 909 Aloc = (Mat_SeqAIJ*) aij->B->data; 910 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 911 row = aij->rstart; 912 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 913 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 914 for ( i=0; i<m; i++ ) { 915 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 916 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 917 } 918 PetscFree(ct); 919 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 921 /* 922 Everyone has to call to draw the matrix since the graphics waits are 923 synchronized across all processors that share the Draw object 924 */ 925 if (!rank || vtype == DRAW_VIEWER) { 926 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 927 } 928 ierr = MatDestroy(A); CHKERRQ(ierr); 929 } 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNC__ 935 #define __FUNC__ "MatView_MPIAIJ" 936 int MatView_MPIAIJ(Mat mat,Viewer viewer) 937 { 938 int ierr; 939 ViewerType vtype; 940 941 PetscFunctionBegin; 942 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 943 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 944 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 945 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 946 } else if (vtype == BINARY_FILE_VIEWER) { 947 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 948 } else { 949 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 950 } 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This has to provide several versions. 956 957 2) a) use only local smoothing updating outer values only once. 958 b) local smoothing updating outer values each inner iteration 959 3) color updating out values betwen colors. 960 */ 961 #undef __FUNC__ 962 #define __FUNC__ "MatRelax_MPIAIJ" 963 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 964 double fshift,int its,Vec xx) 965 { 966 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 967 Mat AA = mat->A, BB = mat->B; 968 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 969 Scalar *b,*x,*xs,*ls,d,*v,sum; 970 int ierr,*idx, *diag; 971 int n = mat->n, m = mat->m, i,shift = A->indexshift; 972 973 PetscFunctionBegin; 974 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 975 xs = x + shift; /* shift by one for index start of 1 */ 976 ls = ls + shift; 977 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 978 diag = A->diag; 979 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 980 if (flag & SOR_ZERO_INITIAL_GUESS) { 981 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 985 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 986 while (its--) { 987 /* go down through the rows */ 988 for ( i=0; i<m; i++ ) { 989 n = A->i[i+1] - A->i[i]; 990 PLogFlops(4*n+3); 991 idx = A->j + A->i[i] + shift; 992 v = A->a + A->i[i] + shift; 993 sum = b[i]; 994 SPARSEDENSEMDOT(sum,xs,v,idx,n); 995 d = fshift + A->a[diag[i]+shift]; 996 n = B->i[i+1] - B->i[i]; 997 idx = B->j + B->i[i] + shift; 998 v = B->a + B->i[i] + shift; 999 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1000 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1001 } 1002 /* come up through the rows */ 1003 for ( i=m-1; i>-1; i-- ) { 1004 n = A->i[i+1] - A->i[i]; 1005 PLogFlops(4*n+3) 1006 idx = A->j + A->i[i] + shift; 1007 v = A->a + A->i[i] + shift; 1008 sum = b[i]; 1009 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1010 d = fshift + A->a[diag[i]+shift]; 1011 n = B->i[i+1] - B->i[i]; 1012 idx = B->j + B->i[i] + shift; 1013 v = B->a + B->i[i] + shift; 1014 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1015 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1016 } 1017 } 1018 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1024 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1025 while (its--) { 1026 for ( i=0; i<m; i++ ) { 1027 n = A->i[i+1] - A->i[i]; 1028 PLogFlops(4*n+3); 1029 idx = A->j + A->i[i] + shift; 1030 v = A->a + A->i[i] + shift; 1031 sum = b[i]; 1032 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1033 d = fshift + A->a[diag[i]+shift]; 1034 n = B->i[i+1] - B->i[i]; 1035 idx = B->j + B->i[i] + shift; 1036 v = B->a + B->i[i] + shift; 1037 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1038 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1039 } 1040 } 1041 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1042 if (flag & SOR_ZERO_INITIAL_GUESS) { 1043 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1044 PetscFunctionReturn(0); 1045 } 1046 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1047 mat->Mvctx); CHKERRQ(ierr); 1048 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1049 mat->Mvctx); CHKERRQ(ierr); 1050 while (its--) { 1051 for ( i=m-1; i>-1; i-- ) { 1052 n = A->i[i+1] - A->i[i]; 1053 PLogFlops(4*n+3); 1054 idx = A->j + A->i[i] + shift; 1055 v = A->a + A->i[i] + shift; 1056 sum = b[i]; 1057 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1058 d = fshift + A->a[diag[i]+shift]; 1059 n = B->i[i+1] - B->i[i]; 1060 idx = B->j + B->i[i] + shift; 1061 v = B->a + B->i[i] + shift; 1062 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1063 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1064 } 1065 } 1066 } else { 1067 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1068 } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNC__ 1073 #define __FUNC__ "MatGetInfo_MPIAIJ" 1074 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1075 { 1076 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1077 Mat A = mat->A, B = mat->B; 1078 int ierr; 1079 double isend[5], irecv[5]; 1080 1081 PetscFunctionBegin; 1082 info->block_size = 1.0; 1083 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1084 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1085 isend[3] = info->memory; isend[4] = info->mallocs; 1086 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1087 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1088 isend[3] += info->memory; isend[4] += info->mallocs; 1089 if (flag == MAT_LOCAL) { 1090 info->nz_used = isend[0]; 1091 info->nz_allocated = isend[1]; 1092 info->nz_unneeded = isend[2]; 1093 info->memory = isend[3]; 1094 info->mallocs = isend[4]; 1095 } else if (flag == MAT_GLOBAL_MAX) { 1096 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1097 info->nz_used = irecv[0]; 1098 info->nz_allocated = irecv[1]; 1099 info->nz_unneeded = irecv[2]; 1100 info->memory = irecv[3]; 1101 info->mallocs = irecv[4]; 1102 } else if (flag == MAT_GLOBAL_SUM) { 1103 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1104 info->nz_used = irecv[0]; 1105 info->nz_allocated = irecv[1]; 1106 info->nz_unneeded = irecv[2]; 1107 info->memory = irecv[3]; 1108 info->mallocs = irecv[4]; 1109 } 1110 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1111 info->fill_ratio_needed = 0; 1112 info->factor_mallocs = 0; 1113 info->rows_global = (double)mat->M; 1114 info->columns_global = (double)mat->N; 1115 info->rows_local = (double)mat->m; 1116 info->columns_local = (double)mat->N; 1117 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNC__ 1122 #define __FUNC__ "MatSetOption_MPIAIJ" 1123 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1126 1127 PetscFunctionBegin; 1128 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1129 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1130 op == MAT_COLUMNS_UNSORTED || 1131 op == MAT_COLUMNS_SORTED || 1132 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1133 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1134 MatSetOption(a->A,op); 1135 MatSetOption(a->B,op); 1136 } else if (op == MAT_ROW_ORIENTED) { 1137 a->roworiented = 1; 1138 MatSetOption(a->A,op); 1139 MatSetOption(a->B,op); 1140 } else if (op == MAT_ROWS_SORTED || 1141 op == MAT_ROWS_UNSORTED || 1142 op == MAT_SYMMETRIC || 1143 op == MAT_STRUCTURALLY_SYMMETRIC || 1144 op == MAT_YES_NEW_DIAGONALS) 1145 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1146 else if (op == MAT_COLUMN_ORIENTED) { 1147 a->roworiented = 0; 1148 MatSetOption(a->A,op); 1149 MatSetOption(a->B,op); 1150 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1151 a->donotstash = 1; 1152 } else if (op == MAT_NO_NEW_DIAGONALS){ 1153 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1154 } else { 1155 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1156 } 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNC__ 1161 #define __FUNC__ "MatGetSize_MPIAIJ" 1162 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1163 { 1164 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1165 1166 PetscFunctionBegin; 1167 if (m) *m = mat->M; 1168 if (n) *n = mat->N; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNC__ 1173 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1174 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1175 { 1176 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1177 1178 PetscFunctionBegin; 1179 if (m) *m = mat->m; 1180 if (n) *n = mat->n; 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNC__ 1185 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1186 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1187 { 1188 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1189 1190 PetscFunctionBegin; 1191 *m = mat->rstart; *n = mat->rend; 1192 PetscFunctionReturn(0); 1193 } 1194 1195 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1196 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1197 1198 #undef __FUNC__ 1199 #define __FUNC__ "MatGetRow_MPIAIJ" 1200 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1201 { 1202 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1203 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1204 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1205 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1206 int *cmap, *idx_p; 1207 1208 PetscFunctionBegin; 1209 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1210 mat->getrowactive = PETSC_TRUE; 1211 1212 if (!mat->rowvalues && (idx || v)) { 1213 /* 1214 allocate enough space to hold information from the longest row. 1215 */ 1216 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1217 int max = 1,m = mat->m,tmp; 1218 for ( i=0; i<m; i++ ) { 1219 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1220 if (max < tmp) { max = tmp; } 1221 } 1222 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1223 CHKPTRQ(mat->rowvalues); 1224 mat->rowindices = (int *) (mat->rowvalues + max); 1225 } 1226 1227 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1228 lrow = row - rstart; 1229 1230 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1231 if (!v) {pvA = 0; pvB = 0;} 1232 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1233 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1234 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1235 nztot = nzA + nzB; 1236 1237 cmap = mat->garray; 1238 if (v || idx) { 1239 if (nztot) { 1240 /* Sort by increasing column numbers, assuming A and B already sorted */ 1241 int imark = -1; 1242 if (v) { 1243 *v = v_p = mat->rowvalues; 1244 for ( i=0; i<nzB; i++ ) { 1245 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1246 else break; 1247 } 1248 imark = i; 1249 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1250 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1251 } 1252 if (idx) { 1253 *idx = idx_p = mat->rowindices; 1254 if (imark > -1) { 1255 for ( i=0; i<imark; i++ ) { 1256 idx_p[i] = cmap[cworkB[i]]; 1257 } 1258 } else { 1259 for ( i=0; i<nzB; i++ ) { 1260 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1261 else break; 1262 } 1263 imark = i; 1264 } 1265 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1266 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1267 } 1268 } 1269 else { 1270 if (idx) *idx = 0; 1271 if (v) *v = 0; 1272 } 1273 } 1274 *nz = nztot; 1275 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1276 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNC__ 1281 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1282 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1285 1286 PetscFunctionBegin; 1287 if (aij->getrowactive == PETSC_FALSE) { 1288 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1289 } 1290 aij->getrowactive = PETSC_FALSE; 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNC__ 1295 #define __FUNC__ "MatNorm_MPIAIJ" 1296 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1297 { 1298 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1299 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1300 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1301 double sum = 0.0; 1302 Scalar *v; 1303 1304 PetscFunctionBegin; 1305 if (aij->size == 1) { 1306 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1307 } else { 1308 if (type == NORM_FROBENIUS) { 1309 v = amat->a; 1310 for (i=0; i<amat->nz; i++ ) { 1311 #if defined(USE_PETSC_COMPLEX) 1312 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1313 #else 1314 sum += (*v)*(*v); v++; 1315 #endif 1316 } 1317 v = bmat->a; 1318 for (i=0; i<bmat->nz; i++ ) { 1319 #if defined(USE_PETSC_COMPLEX) 1320 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1321 #else 1322 sum += (*v)*(*v); v++; 1323 #endif 1324 } 1325 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1326 *norm = sqrt(*norm); 1327 } else if (type == NORM_1) { /* max column norm */ 1328 double *tmp, *tmp2; 1329 int *jj, *garray = aij->garray; 1330 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1331 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1332 PetscMemzero(tmp,aij->N*sizeof(double)); 1333 *norm = 0.0; 1334 v = amat->a; jj = amat->j; 1335 for ( j=0; j<amat->nz; j++ ) { 1336 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1337 } 1338 v = bmat->a; jj = bmat->j; 1339 for ( j=0; j<bmat->nz; j++ ) { 1340 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1341 } 1342 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1343 for ( j=0; j<aij->N; j++ ) { 1344 if (tmp2[j] > *norm) *norm = tmp2[j]; 1345 } 1346 PetscFree(tmp); PetscFree(tmp2); 1347 } else if (type == NORM_INFINITY) { /* max row norm */ 1348 double ntemp = 0.0; 1349 for ( j=0; j<amat->m; j++ ) { 1350 v = amat->a + amat->i[j] + shift; 1351 sum = 0.0; 1352 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1353 sum += PetscAbsScalar(*v); v++; 1354 } 1355 v = bmat->a + bmat->i[j] + shift; 1356 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1357 sum += PetscAbsScalar(*v); v++; 1358 } 1359 if (sum > ntemp) ntemp = sum; 1360 } 1361 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1362 } else { 1363 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1364 } 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNC__ 1370 #define __FUNC__ "MatTranspose_MPIAIJ" 1371 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1372 { 1373 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1374 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1375 int ierr,shift = Aloc->indexshift; 1376 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1377 Mat B; 1378 Scalar *array; 1379 1380 PetscFunctionBegin; 1381 if (matout == PETSC_NULL && M != N) { 1382 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1383 } 1384 1385 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1386 1387 /* copy over the A part */ 1388 Aloc = (Mat_SeqAIJ*) a->A->data; 1389 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1390 row = a->rstart; 1391 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1392 for ( i=0; i<m; i++ ) { 1393 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1394 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1395 } 1396 aj = Aloc->j; 1397 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1398 1399 /* copy over the B part */ 1400 Aloc = (Mat_SeqAIJ*) a->B->data; 1401 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1402 row = a->rstart; 1403 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1404 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1405 for ( i=0; i<m; i++ ) { 1406 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1407 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1408 } 1409 PetscFree(ct); 1410 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1412 if (matout != PETSC_NULL) { 1413 *matout = B; 1414 } else { 1415 PetscOps *Abops; 1416 struct _MatOps *Aops; 1417 1418 /* This isn't really an in-place transpose .... but free data structures from a */ 1419 PetscFree(a->rowners); 1420 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1421 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1422 if (a->colmap) PetscFree(a->colmap); 1423 if (a->garray) PetscFree(a->garray); 1424 if (a->lvec) VecDestroy(a->lvec); 1425 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1426 PetscFree(a); 1427 1428 /* 1429 This is horrible, horrible code. We need to keep the 1430 A pointers for the bops and ops but copy everything 1431 else from C. 1432 */ 1433 Abops = A->bops; 1434 Aops = A->ops; 1435 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1436 A->bops = Abops; 1437 A->ops = Aops; 1438 PetscHeaderDestroy(B); 1439 } 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNC__ 1444 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1445 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1446 { 1447 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1448 Mat a = aij->A, b = aij->B; 1449 int ierr,s1,s2,s3; 1450 1451 PetscFunctionBegin; 1452 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1453 if (rr) { 1454 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1455 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1456 /* Overlap communication with computation. */ 1457 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1458 } 1459 if (ll) { 1460 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1461 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1462 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1463 } 1464 /* scale the diagonal block */ 1465 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1466 1467 if (rr) { 1468 /* Do a scatter end and then right scale the off-diagonal block */ 1469 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1470 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1471 } 1472 1473 PetscFunctionReturn(0); 1474 } 1475 1476 1477 extern int MatPrintHelp_SeqAIJ(Mat); 1478 #undef __FUNC__ 1479 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1480 int MatPrintHelp_MPIAIJ(Mat A) 1481 { 1482 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1483 int ierr; 1484 1485 PetscFunctionBegin; 1486 if (!a->rank) { 1487 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } 1491 1492 #undef __FUNC__ 1493 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1494 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1495 { 1496 PetscFunctionBegin; 1497 *bs = 1; 1498 PetscFunctionReturn(0); 1499 } 1500 #undef __FUNC__ 1501 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1502 int MatSetUnfactored_MPIAIJ(Mat A) 1503 { 1504 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1505 int ierr; 1506 1507 PetscFunctionBegin; 1508 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNC__ 1513 #define __FUNC__ "MatEqual_MPIAIJ" 1514 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1515 { 1516 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1517 Mat a, b, c, d; 1518 PetscTruth flg; 1519 int ierr; 1520 1521 PetscFunctionBegin; 1522 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1523 a = matA->A; b = matA->B; 1524 c = matB->A; d = matB->B; 1525 1526 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1527 if (flg == PETSC_TRUE) { 1528 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1529 } 1530 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1535 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1536 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1537 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1538 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1539 1540 /* -------------------------------------------------------------------*/ 1541 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1542 MatGetRow_MPIAIJ, 1543 MatRestoreRow_MPIAIJ, 1544 MatMult_MPIAIJ, 1545 MatMultAdd_MPIAIJ, 1546 MatMultTrans_MPIAIJ, 1547 MatMultTransAdd_MPIAIJ, 1548 0, 1549 0, 1550 0, 1551 0, 1552 0, 1553 0, 1554 MatRelax_MPIAIJ, 1555 MatTranspose_MPIAIJ, 1556 MatGetInfo_MPIAIJ, 1557 MatEqual_MPIAIJ, 1558 MatGetDiagonal_MPIAIJ, 1559 MatDiagonalScale_MPIAIJ, 1560 MatNorm_MPIAIJ, 1561 MatAssemblyBegin_MPIAIJ, 1562 MatAssemblyEnd_MPIAIJ, 1563 0, 1564 MatSetOption_MPIAIJ, 1565 MatZeroEntries_MPIAIJ, 1566 MatZeroRows_MPIAIJ, 1567 0, 1568 0, 1569 0, 1570 0, 1571 MatGetSize_MPIAIJ, 1572 MatGetLocalSize_MPIAIJ, 1573 MatGetOwnershipRange_MPIAIJ, 1574 0, 1575 0, 1576 0, 1577 0, 1578 MatConvertSameType_MPIAIJ, 1579 0, 1580 0, 1581 0, 1582 0, 1583 0, 1584 MatGetSubMatrices_MPIAIJ, 1585 MatIncreaseOverlap_MPIAIJ, 1586 MatGetValues_MPIAIJ, 1587 0, 1588 MatPrintHelp_MPIAIJ, 1589 MatScale_MPIAIJ, 1590 0, 1591 0, 1592 0, 1593 MatGetBlockSize_MPIAIJ, 1594 0, 1595 0, 1596 0, 1597 0, 1598 MatFDColoringCreate_MPIAIJ, 1599 0, 1600 MatSetUnfactored_MPIAIJ, 1601 0, 1602 0, 1603 MatGetSubMatrix_MPIAIJ, 1604 0, 1605 0, 1606 MatGetMaps_Petsc}; 1607 1608 1609 #undef __FUNC__ 1610 #define __FUNC__ "MatCreateMPIAIJ" 1611 /*@C 1612 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1613 (the default parallel PETSc format). For good matrix assembly performance 1614 the user should preallocate the matrix storage by setting the parameters 1615 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1616 performance can be increased by more than a factor of 50. 1617 1618 Collective on MPI_Comm 1619 1620 Input Parameters: 1621 + comm - MPI communicator 1622 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1623 This value should be the same as the local size used in creating the 1624 y vector for the matrix-vector product y = Ax. 1625 . n - This value should be the same as the local size used in creating the 1626 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1627 calculated if N is given) For square matrices n is almost always m. 1628 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1629 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1630 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1631 (same for all local rows) 1632 . d_nzz - array containing the number of nonzeros in the various rows of the 1633 diagonal portion of the local submatrix (possibly different for each row) 1634 or PETSC_NULL. You must leave room for the diagonal entry even if 1635 it is zero. 1636 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1637 submatrix (same for all local rows). 1638 - o_nzz - array containing the number of nonzeros in the various rows of the 1639 off-diagonal portion of the local submatrix (possibly different for 1640 each row) or PETSC_NULL. 1641 1642 Output Parameter: 1643 . A - the matrix 1644 1645 Notes: 1646 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1647 than it must be used on all processors that share the object for that argument. 1648 1649 The AIJ format (also called the Yale sparse matrix format or 1650 compressed row storage), is fully compatible with standard Fortran 77 1651 storage. That is, the stored row and column indices can begin at 1652 either one (as in Fortran) or zero. See the users manual for details. 1653 1654 The user MUST specify either the local or global matrix dimensions 1655 (possibly both). 1656 1657 By default, this format uses inodes (identical nodes) when possible. 1658 We search for consecutive rows with the same nonzero structure, thereby 1659 reusing matrix information to achieve increased efficiency. 1660 1661 Options Database Keys: 1662 + -mat_aij_no_inode - Do not use inodes 1663 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1664 - -mat_aij_oneindex - Internally use indexing starting at 1 1665 rather than 0. Note that when calling MatSetValues(), 1666 the user still MUST index entries starting at 0! 1667 1668 Storage Information: 1669 For a square global matrix we define each processor's diagonal portion 1670 to be its local rows and the corresponding columns (a square submatrix); 1671 each processor's off-diagonal portion encompasses the remainder of the 1672 local matrix (a rectangular submatrix). 1673 1674 The user can specify preallocated storage for the diagonal part of 1675 the local submatrix with either d_nz or d_nnz (not both). Set 1676 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1677 memory allocation. Likewise, specify preallocated storage for the 1678 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1679 1680 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1681 the figure below we depict these three local rows and all columns (0-11). 1682 1683 .vb 1684 0 1 2 3 4 5 6 7 8 9 10 11 1685 ------------------- 1686 row 3 | o o o d d d o o o o o o 1687 row 4 | o o o d d d o o o o o o 1688 row 5 | o o o d d d o o o o o o 1689 ------------------- 1690 .ve 1691 1692 Thus, any entries in the d locations are stored in the d (diagonal) 1693 submatrix, and any entries in the o locations are stored in the 1694 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1695 stored simply in the MATSEQAIJ format for compressed row storage. 1696 1697 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1698 and o_nz should indicate the number of nonzeros per row in the o matrix. 1699 In general, for PDE problems in which most nonzeros are near the diagonal, 1700 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1701 or you will get TERRIBLE performance; see the users' manual chapter on 1702 matrices. 1703 1704 .keywords: matrix, aij, compressed row, sparse, parallel 1705 1706 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1707 @*/ 1708 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1709 { 1710 Mat B; 1711 Mat_MPIAIJ *b; 1712 int ierr, i,sum[2],work[2],size; 1713 1714 PetscFunctionBegin; 1715 MPI_Comm_size(comm,&size); 1716 if (size == 1) { 1717 if (M == PETSC_DECIDE) M = m; 1718 if (N == PETSC_DECIDE) N = n; 1719 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 *A = 0; 1724 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1725 PLogObjectCreate(B); 1726 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1727 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1728 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1729 B->ops->destroy = MatDestroy_MPIAIJ; 1730 B->ops->view = MatView_MPIAIJ; 1731 B->factor = 0; 1732 B->assembled = PETSC_FALSE; 1733 B->mapping = 0; 1734 1735 B->insertmode = NOT_SET_VALUES; 1736 b->size = size; 1737 MPI_Comm_rank(comm,&b->rank); 1738 1739 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1740 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1741 } 1742 1743 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1744 work[0] = m; work[1] = n; 1745 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1746 if (M == PETSC_DECIDE) M = sum[0]; 1747 if (N == PETSC_DECIDE) N = sum[1]; 1748 } 1749 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1750 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1751 b->m = m; B->m = m; 1752 b->n = n; B->n = n; 1753 b->N = N; B->N = N; 1754 b->M = M; B->M = M; 1755 1756 /* the information in the maps duplicates the information computed below, eventually 1757 we should remove the duplicate information that is not contained in the maps */ 1758 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1759 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1760 1761 /* build local table of row and column ownerships */ 1762 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1763 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1764 b->cowners = b->rowners + b->size + 2; 1765 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1766 b->rowners[0] = 0; 1767 for ( i=2; i<=b->size; i++ ) { 1768 b->rowners[i] += b->rowners[i-1]; 1769 } 1770 b->rstart = b->rowners[b->rank]; 1771 b->rend = b->rowners[b->rank+1]; 1772 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1773 b->cowners[0] = 0; 1774 for ( i=2; i<=b->size; i++ ) { 1775 b->cowners[i] += b->cowners[i-1]; 1776 } 1777 b->cstart = b->cowners[b->rank]; 1778 b->cend = b->cowners[b->rank+1]; 1779 1780 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1781 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1782 PLogObjectParent(B,b->A); 1783 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1784 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1785 PLogObjectParent(B,b->B); 1786 1787 /* build cache for off array entries formed */ 1788 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1789 b->donotstash = 0; 1790 b->colmap = 0; 1791 b->garray = 0; 1792 b->roworiented = 1; 1793 1794 /* stuff used for matrix vector multiply */ 1795 b->lvec = 0; 1796 b->Mvctx = 0; 1797 1798 /* stuff for MatGetRow() */ 1799 b->rowindices = 0; 1800 b->rowvalues = 0; 1801 b->getrowactive = PETSC_FALSE; 1802 1803 *A = B; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNC__ 1808 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1809 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1810 { 1811 Mat mat; 1812 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1813 int ierr, len=0, flg; 1814 1815 PetscFunctionBegin; 1816 *newmat = 0; 1817 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1818 PLogObjectCreate(mat); 1819 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1820 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1821 mat->ops->destroy = MatDestroy_MPIAIJ; 1822 mat->ops->view = MatView_MPIAIJ; 1823 mat->factor = matin->factor; 1824 mat->assembled = PETSC_TRUE; 1825 1826 a->m = mat->m = oldmat->m; 1827 a->n = mat->n = oldmat->n; 1828 a->M = mat->M = oldmat->M; 1829 a->N = mat->N = oldmat->N; 1830 1831 a->rstart = oldmat->rstart; 1832 a->rend = oldmat->rend; 1833 a->cstart = oldmat->cstart; 1834 a->cend = oldmat->cend; 1835 a->size = oldmat->size; 1836 a->rank = oldmat->rank; 1837 mat->insertmode = NOT_SET_VALUES; 1838 a->rowvalues = 0; 1839 a->getrowactive = PETSC_FALSE; 1840 1841 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1842 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1843 a->cowners = a->rowners + a->size + 2; 1844 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1845 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1846 if (oldmat->colmap) { 1847 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1848 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1849 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1850 } else a->colmap = 0; 1851 if (oldmat->garray) { 1852 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1853 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1854 PLogObjectMemory(mat,len*sizeof(int)); 1855 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1856 } else a->garray = 0; 1857 1858 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1859 PLogObjectParent(mat,a->lvec); 1860 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1861 PLogObjectParent(mat,a->Mvctx); 1862 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1863 PLogObjectParent(mat,a->A); 1864 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1865 PLogObjectParent(mat,a->B); 1866 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1867 if (flg) { 1868 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1869 } 1870 *newmat = mat; 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #include "sys.h" 1875 1876 #undef __FUNC__ 1877 #define __FUNC__ "MatLoad_MPIAIJ" 1878 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1879 { 1880 Mat A; 1881 Scalar *vals,*svals; 1882 MPI_Comm comm = ((PetscObject)viewer)->comm; 1883 MPI_Status status; 1884 int i, nz, ierr, j,rstart, rend, fd; 1885 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1886 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1887 int tag = ((PetscObject)viewer)->tag; 1888 1889 PetscFunctionBegin; 1890 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1891 if (!rank) { 1892 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1893 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1894 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1895 if (header[3] < 0) { 1896 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1897 } 1898 } 1899 1900 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1901 M = header[1]; N = header[2]; 1902 /* determine ownership of all rows */ 1903 m = M/size + ((M % size) > rank); 1904 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1905 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1906 rowners[0] = 0; 1907 for ( i=2; i<=size; i++ ) { 1908 rowners[i] += rowners[i-1]; 1909 } 1910 rstart = rowners[rank]; 1911 rend = rowners[rank+1]; 1912 1913 /* distribute row lengths to all processors */ 1914 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1915 offlens = ourlens + (rend-rstart); 1916 if (!rank) { 1917 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1918 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1919 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1920 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1921 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1922 PetscFree(sndcounts); 1923 } else { 1924 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1925 } 1926 1927 if (!rank) { 1928 /* calculate the number of nonzeros on each processor */ 1929 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1930 PetscMemzero(procsnz,size*sizeof(int)); 1931 for ( i=0; i<size; i++ ) { 1932 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1933 procsnz[i] += rowlengths[j]; 1934 } 1935 } 1936 PetscFree(rowlengths); 1937 1938 /* determine max buffer needed and allocate it */ 1939 maxnz = 0; 1940 for ( i=0; i<size; i++ ) { 1941 maxnz = PetscMax(maxnz,procsnz[i]); 1942 } 1943 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1944 1945 /* read in my part of the matrix column indices */ 1946 nz = procsnz[0]; 1947 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1948 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1949 1950 /* read in every one elses and ship off */ 1951 for ( i=1; i<size; i++ ) { 1952 nz = procsnz[i]; 1953 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1954 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1955 } 1956 PetscFree(cols); 1957 } else { 1958 /* determine buffer space needed for message */ 1959 nz = 0; 1960 for ( i=0; i<m; i++ ) { 1961 nz += ourlens[i]; 1962 } 1963 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1964 1965 /* receive message of column indices*/ 1966 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1967 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1968 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1969 } 1970 1971 /* loop over local rows, determining number of off diagonal entries */ 1972 PetscMemzero(offlens,m*sizeof(int)); 1973 jj = 0; 1974 for ( i=0; i<m; i++ ) { 1975 for ( j=0; j<ourlens[i]; j++ ) { 1976 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1977 jj++; 1978 } 1979 } 1980 1981 /* create our matrix */ 1982 for ( i=0; i<m; i++ ) { 1983 ourlens[i] -= offlens[i]; 1984 } 1985 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1986 A = *newmat; 1987 MatSetOption(A,MAT_COLUMNS_SORTED); 1988 for ( i=0; i<m; i++ ) { 1989 ourlens[i] += offlens[i]; 1990 } 1991 1992 if (!rank) { 1993 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1994 1995 /* read in my part of the matrix numerical values */ 1996 nz = procsnz[0]; 1997 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1998 1999 /* insert into matrix */ 2000 jj = rstart; 2001 smycols = mycols; 2002 svals = vals; 2003 for ( i=0; i<m; i++ ) { 2004 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2005 smycols += ourlens[i]; 2006 svals += ourlens[i]; 2007 jj++; 2008 } 2009 2010 /* read in other processors and ship out */ 2011 for ( i=1; i<size; i++ ) { 2012 nz = procsnz[i]; 2013 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2014 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2015 } 2016 PetscFree(procsnz); 2017 } else { 2018 /* receive numeric values */ 2019 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2020 2021 /* receive message of values*/ 2022 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2023 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2024 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2025 2026 /* insert into matrix */ 2027 jj = rstart; 2028 smycols = mycols; 2029 svals = vals; 2030 for ( i=0; i<m; i++ ) { 2031 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2032 smycols += ourlens[i]; 2033 svals += ourlens[i]; 2034 jj++; 2035 } 2036 } 2037 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2038 2039 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2040 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 #undef __FUNC__ 2045 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2046 /* 2047 Not great since it makes two copies of the submatrix, first an SeqAIJ 2048 in local and then by concatenating the local matrices the end result. 2049 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2050 */ 2051 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 2052 { 2053 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2054 Mat *local,M, Mreuse; 2055 Scalar *vwork,*aa; 2056 MPI_Comm comm = mat->comm; 2057 Mat_SeqAIJ *aij; 2058 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2059 2060 PetscFunctionBegin; 2061 MPI_Comm_rank(comm,&rank); 2062 MPI_Comm_size(comm,&size); 2063 2064 if (call == MAT_REUSE_MATRIX) { 2065 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2066 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2067 local = &Mreuse; 2068 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2069 } else { 2070 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2071 Mreuse = *local; 2072 PetscFree(local); 2073 } 2074 2075 /* 2076 m - number of local rows 2077 n - number of columns (same on all processors) 2078 rstart - first row in new global matrix generated 2079 */ 2080 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2081 if (call == MAT_INITIAL_MATRIX) { 2082 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2083 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2084 ii = aij->i; 2085 jj = aij->j; 2086 2087 /* 2088 Determine the number of non-zeros in the diagonal and off-diagonal 2089 portions of the matrix in order to do correct preallocation 2090 */ 2091 2092 /* first get start and end of "diagonal" columns */ 2093 if (csize == PETSC_DECIDE) { 2094 nlocal = n/size + ((n % size) > rank); 2095 } else { 2096 nlocal = csize; 2097 } 2098 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2099 rstart = rend - nlocal; 2100 if (rank == size - 1 && rend != n) { 2101 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2102 } 2103 2104 /* next, compute all the lengths */ 2105 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2106 olens = dlens + m; 2107 for ( i=0; i<m; i++ ) { 2108 jend = ii[i+1] - ii[i]; 2109 olen = 0; 2110 dlen = 0; 2111 for ( j=0; j<jend; j++ ) { 2112 if ( *jj < rstart || *jj >= rend) olen++; 2113 else dlen++; 2114 jj++; 2115 } 2116 olens[i] = olen; 2117 dlens[i] = dlen; 2118 } 2119 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2120 PetscFree(dlens); 2121 } else { 2122 int ml,nl; 2123 2124 M = *newmat; 2125 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2126 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2127 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2128 /* 2129 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2130 rather than the slower MatSetValues(). 2131 */ 2132 M->was_assembled = PETSC_TRUE; 2133 M->assembled = PETSC_FALSE; 2134 } 2135 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2136 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2137 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2138 ii = aij->i; 2139 jj = aij->j; 2140 aa = aij->a; 2141 for (i=0; i<m; i++) { 2142 row = rstart + i; 2143 nz = ii[i+1] - ii[i]; 2144 cwork = jj; jj += nz; 2145 vwork = aa; aa += nz; 2146 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2147 } 2148 2149 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2150 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2151 *newmat = M; 2152 2153 /* save submatrix used in processor for next request */ 2154 if (call == MAT_INITIAL_MATRIX) { 2155 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2156 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2157 } 2158 2159 PetscFunctionReturn(0); 2160 } 2161