1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.252 1998/06/30 21:10:39 balay Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } else { 282 if (!aij->colmap) { 283 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 col = aij->colmap[idxn[j]] - 1; 286 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 287 else { 288 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 289 } 290 } 291 } 292 } else { 293 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 294 } 295 } 296 PetscFunctionReturn(0); 297 } 298 299 #undef __FUNC__ 300 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 301 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 304 MPI_Comm comm = mat->comm; 305 int size = aij->size, *owners = aij->rowners; 306 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 307 MPI_Request *send_waits,*recv_waits; 308 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 309 InsertMode addv; 310 Scalar *rvalues,*svalues; 311 312 PetscFunctionBegin; 313 if (aij->donotstash) { 314 aij->svalues = 0; aij->rvalues = 0; 315 aij->nsends = 0; aij->nrecvs = 0; 316 aij->send_waits = 0; aij->recv_waits = 0; 317 aij->rmax = 0; 318 PetscFunctionReturn(0); 319 } 320 321 /* make sure all processors are either in INSERTMODE or ADDMODE */ 322 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 323 if (addv == (ADD_VALUES|INSERT_VALUES)) { 324 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 325 } 326 mat->insertmode = addv; /* in case this processor had no cache */ 327 328 /* first count number of contributors to each processor */ 329 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 330 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 331 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 332 for ( i=0; i<aij->stash.n; i++ ) { 333 idx = aij->stash.idx[i]; 334 for ( j=0; j<size; j++ ) { 335 if (idx >= owners[j] && idx < owners[j+1]) { 336 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 337 } 338 } 339 } 340 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 341 342 /* inform other processors of number of messages and max length*/ 343 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 344 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 345 nreceives = work[rank]; 346 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 347 nmax = work[rank]; 348 PetscFree(work); 349 350 /* post receives: 351 1) each message will consist of ordered pairs 352 (global index,value) we store the global index as a double 353 to simplify the message passing. 354 2) since we don't know how long each individual message is we 355 allocate the largest needed buffer for each receive. Potentially 356 this is a lot of wasted space. 357 358 359 This could be done better. 360 */ 361 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 362 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 363 for ( i=0; i<nreceives; i++ ) { 364 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 365 comm,recv_waits+i);CHKERRQ(ierr); 366 } 367 368 /* do sends: 369 1) starts[i] gives the starting index in svalues for stuff going to 370 the ith processor 371 */ 372 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 373 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 374 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 375 starts[0] = 0; 376 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 377 for ( i=0; i<aij->stash.n; i++ ) { 378 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 379 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 380 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 381 } 382 PetscFree(owner); 383 starts[0] = 0; 384 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 385 count = 0; 386 for ( i=0; i<size; i++ ) { 387 if (procs[i]) { 388 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 389 } 390 } 391 PetscFree(starts); 392 PetscFree(nprocs); 393 394 /* Free cache space */ 395 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 396 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 397 398 aij->svalues = svalues; aij->rvalues = rvalues; 399 aij->nsends = nsends; aij->nrecvs = nreceives; 400 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 401 aij->rmax = nmax; 402 403 PetscFunctionReturn(0); 404 } 405 extern int MatSetUpMultiply_MPIAIJ(Mat); 406 407 #undef __FUNC__ 408 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 409 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 412 MPI_Status *send_status,recv_status; 413 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 414 int row,col,other_disassembled; 415 Scalar *values,val; 416 InsertMode addv = mat->insertmode; 417 418 PetscFunctionBegin; 419 420 /* wait on receives */ 421 while (count) { 422 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 423 /* unpack receives into our local space */ 424 values = aij->rvalues + 3*imdex*aij->rmax; 425 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 426 n = n/3; 427 for ( i=0; i<n; i++ ) { 428 row = (int) PetscReal(values[3*i]) - aij->rstart; 429 col = (int) PetscReal(values[3*i+1]); 430 val = values[3*i+2]; 431 if (col >= aij->cstart && col < aij->cend) { 432 col -= aij->cstart; 433 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 434 } else { 435 if (mat->was_assembled || mat->assembled) { 436 if (!aij->colmap) { 437 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 438 } 439 col = aij->colmap[col] - 1; 440 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 441 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 442 col = (int) PetscReal(values[3*i+1]); 443 } 444 } 445 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 446 } 447 } 448 count--; 449 } 450 if (aij->recv_waits) PetscFree(aij->recv_waits); 451 if (aij->rvalues) PetscFree(aij->rvalues); 452 453 /* wait on sends */ 454 if (aij->nsends) { 455 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 456 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 457 PetscFree(send_status); 458 } 459 if (aij->send_waits) PetscFree(aij->send_waits); 460 if (aij->svalues) PetscFree(aij->svalues); 461 462 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 463 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 464 465 /* determine if any processor has disassembled, if so we must 466 also disassemble ourselfs, in order that we may reassemble. */ 467 /* 468 if nonzero structure of submatrix B cannot change then we know that 469 no processor disassembled thus we can skip this stuff 470 */ 471 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 472 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 473 if (mat->was_assembled && !other_disassembled) { 474 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 475 } 476 } 477 478 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 479 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 480 } 481 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 482 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 483 484 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNC__ 489 #define __FUNC__ "MatZeroEntries_MPIAIJ" 490 int MatZeroEntries_MPIAIJ(Mat A) 491 { 492 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 493 int ierr; 494 495 PetscFunctionBegin; 496 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 497 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNC__ 502 #define __FUNC__ "MatZeroRows_MPIAIJ" 503 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 504 { 505 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 506 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 507 int *procs,*nprocs,j,found,idx,nsends,*work,row; 508 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 509 int *rvalues,tag = A->tag,count,base,slen,n,*source; 510 int *lens,imdex,*lrows,*values,rstart=l->rstart; 511 MPI_Comm comm = A->comm; 512 MPI_Request *send_waits,*recv_waits; 513 MPI_Status recv_status,*send_status; 514 IS istmp; 515 PetscTruth localdiag; 516 517 PetscFunctionBegin; 518 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 519 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 520 521 /* first count number of contributors to each processor */ 522 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 523 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 524 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 525 for ( i=0; i<N; i++ ) { 526 idx = rows[i]; 527 found = 0; 528 for ( j=0; j<size; j++ ) { 529 if (idx >= owners[j] && idx < owners[j+1]) { 530 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 531 } 532 } 533 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 534 } 535 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 536 537 /* inform other processors of number of messages and max length*/ 538 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 539 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 540 nrecvs = work[rank]; 541 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 542 nmax = work[rank]; 543 PetscFree(work); 544 545 /* post receives: */ 546 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 547 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 548 for ( i=0; i<nrecvs; i++ ) { 549 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 550 } 551 552 /* do sends: 553 1) starts[i] gives the starting index in svalues for stuff going to 554 the ith processor 555 */ 556 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 557 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 558 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 559 starts[0] = 0; 560 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 561 for ( i=0; i<N; i++ ) { 562 svalues[starts[owner[i]]++] = rows[i]; 563 } 564 ISRestoreIndices(is,&rows); 565 566 starts[0] = 0; 567 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 568 count = 0; 569 for ( i=0; i<size; i++ ) { 570 if (procs[i]) { 571 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 572 } 573 } 574 PetscFree(starts); 575 576 base = owners[rank]; 577 578 /* wait on receives */ 579 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 580 source = lens + nrecvs; 581 count = nrecvs; slen = 0; 582 while (count) { 583 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 584 /* unpack receives into our local space */ 585 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 586 source[imdex] = recv_status.MPI_SOURCE; 587 lens[imdex] = n; 588 slen += n; 589 count--; 590 } 591 PetscFree(recv_waits); 592 593 /* move the data into the send scatter */ 594 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 595 count = 0; 596 for ( i=0; i<nrecvs; i++ ) { 597 values = rvalues + i*nmax; 598 for ( j=0; j<lens[i]; j++ ) { 599 lrows[count++] = values[j] - base; 600 } 601 } 602 PetscFree(rvalues); PetscFree(lens); 603 PetscFree(owner); PetscFree(nprocs); 604 605 /* actually zap the local rows */ 606 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 607 PLogObjectParent(A,istmp); 608 609 /* 610 Zero the required rows. If the "diagonal block" of the matrix 611 is square and the user wishes to set the diagonal we use seperate 612 code so that MatSetValues() is not called for each diagonal allocating 613 new memory, thus calling lots of mallocs and slowing things down. 614 615 Contributed by: Mathew Knepley 616 */ 617 localdiag = PETSC_FALSE; 618 if (diag && (l->A->M == l->A->N)) { 619 localdiag = PETSC_TRUE; 620 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 621 } else { 622 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 623 } 624 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 625 ierr = ISDestroy(istmp); CHKERRQ(ierr); 626 627 if (diag && (localdiag == PETSC_FALSE)) { 628 for ( i = 0; i < slen; i++ ) { 629 row = lrows[i] + rstart; 630 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 631 } 632 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 633 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 634 } 635 636 if (diag) { 637 for ( i = 0; i < slen; i++ ) { 638 row = lrows[i] + rstart; 639 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 640 } 641 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 642 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 643 } 644 PetscFree(lrows); 645 646 /* wait on sends */ 647 if (nsends) { 648 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 649 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 650 PetscFree(send_status); 651 } 652 PetscFree(send_waits); PetscFree(svalues); 653 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatMult_MPIAIJ" 659 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 660 { 661 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 662 int ierr,nt; 663 664 PetscFunctionBegin; 665 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 666 if (nt != a->n) { 667 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 668 } 669 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 670 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 671 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 672 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNC__ 677 #define __FUNC__ "MatMultAdd_MPIAIJ" 678 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 681 int ierr; 682 683 PetscFunctionBegin; 684 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 685 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 686 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 687 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNC__ 692 #define __FUNC__ "MatMultTrans_MPIAIJ" 693 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 696 int ierr; 697 698 PetscFunctionBegin; 699 /* do nondiagonal part */ 700 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 701 /* send it on its way */ 702 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 703 /* do local part */ 704 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 705 /* receive remote parts: note this assumes the values are not actually */ 706 /* inserted in yy until the next line, which is true for my implementation*/ 707 /* but is not perhaps always true. */ 708 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 714 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 717 int ierr; 718 719 PetscFunctionBegin; 720 /* do nondiagonal part */ 721 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 722 /* send it on its way */ 723 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 724 /* do local part */ 725 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 726 /* receive remote parts: note this assumes the values are not actually */ 727 /* inserted in yy until the next line, which is true for my implementation*/ 728 /* but is not perhaps always true. */ 729 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 /* 734 This only works correctly for square matrices where the subblock A->A is the 735 diagonal block 736 */ 737 #undef __FUNC__ 738 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 739 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 740 { 741 int ierr; 742 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 743 744 PetscFunctionBegin; 745 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 746 if (a->rstart != a->cstart || a->rend != a->cend) { 747 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 748 } 749 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNC__ 754 #define __FUNC__ "MatScale_MPIAIJ" 755 int MatScale_MPIAIJ(Scalar *aa,Mat A) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 758 int ierr; 759 760 PetscFunctionBegin; 761 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 762 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNC__ 767 #define __FUNC__ "MatDestroy_MPIAIJ" 768 int MatDestroy_MPIAIJ(Mat mat) 769 { 770 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 771 int ierr; 772 773 PetscFunctionBegin; 774 if (--mat->refct > 0) PetscFunctionReturn(0); 775 776 if (mat->mapping) { 777 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 778 } 779 if (mat->bmapping) { 780 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 781 } 782 #if defined(USE_PETSC_LOG) 783 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 784 #endif 785 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 786 PetscFree(aij->rowners); 787 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 788 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 789 if (aij->colmap) PetscFree(aij->colmap); 790 if (aij->garray) PetscFree(aij->garray); 791 if (aij->lvec) VecDestroy(aij->lvec); 792 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 793 if (aij->rowvalues) PetscFree(aij->rowvalues); 794 PetscFree(aij); 795 PLogObjectDestroy(mat); 796 PetscHeaderDestroy(mat); 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNC__ 801 #define __FUNC__ "MatView_MPIAIJ_Binary" 802 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 803 { 804 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 805 int ierr; 806 807 PetscFunctionBegin; 808 if (aij->size == 1) { 809 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 810 } 811 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNC__ 816 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 817 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 818 { 819 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 820 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 821 int ierr, format,shift = C->indexshift,rank; 822 FILE *fd; 823 ViewerType vtype; 824 825 PetscFunctionBegin; 826 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 827 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 828 ierr = ViewerGetFormat(viewer,&format); 829 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 830 MatInfo info; 831 int flg; 832 MPI_Comm_rank(mat->comm,&rank); 833 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 834 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 835 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 836 PetscSequentialPhaseBegin(mat->comm,1); 837 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 838 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 839 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 840 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 841 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 842 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 843 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 844 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 845 fflush(fd); 846 PetscSequentialPhaseEnd(mat->comm,1); 847 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 850 PetscFunctionReturn(0); 851 } 852 } 853 854 if (vtype == DRAW_VIEWER) { 855 Draw draw; 856 PetscTruth isnull; 857 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 858 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 859 } 860 861 if (vtype == ASCII_FILE_VIEWER) { 862 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 863 PetscSequentialPhaseBegin(mat->comm,1); 864 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 865 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 866 aij->cend); 867 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 868 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 869 fflush(fd); 870 PetscSequentialPhaseEnd(mat->comm,1); 871 } else { 872 int size = aij->size; 873 rank = aij->rank; 874 if (size == 1) { 875 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 876 } else { 877 /* assemble the entire matrix onto first processor. */ 878 Mat A; 879 Mat_SeqAIJ *Aloc; 880 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 881 Scalar *a; 882 883 if (!rank) { 884 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 885 } else { 886 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 887 } 888 PLogObjectParent(mat,A); 889 890 /* copy over the A part */ 891 Aloc = (Mat_SeqAIJ*) aij->A->data; 892 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 893 row = aij->rstart; 894 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 895 for ( i=0; i<m; i++ ) { 896 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 897 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 898 } 899 aj = Aloc->j; 900 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 901 902 /* copy over the B part */ 903 Aloc = (Mat_SeqAIJ*) aij->B->data; 904 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 905 row = aij->rstart; 906 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 907 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 908 for ( i=0; i<m; i++ ) { 909 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 910 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 911 } 912 PetscFree(ct); 913 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 915 /* 916 Everyone has to call to draw the matrix since the graphics waits are 917 synchronized across all processors that share the Draw object 918 */ 919 if (!rank || vtype == DRAW_VIEWER) { 920 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 921 } 922 ierr = MatDestroy(A); CHKERRQ(ierr); 923 } 924 } 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNC__ 929 #define __FUNC__ "MatView_MPIAIJ" 930 int MatView_MPIAIJ(Mat mat,Viewer viewer) 931 { 932 int ierr; 933 ViewerType vtype; 934 935 PetscFunctionBegin; 936 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 937 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 938 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 939 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 940 } else if (vtype == BINARY_FILE_VIEWER) { 941 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 942 } else { 943 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 944 } 945 PetscFunctionReturn(0); 946 } 947 948 /* 949 This has to provide several versions. 950 951 2) a) use only local smoothing updating outer values only once. 952 b) local smoothing updating outer values each inner iteration 953 3) color updating out values betwen colors. 954 */ 955 #undef __FUNC__ 956 #define __FUNC__ "MatRelax_MPIAIJ" 957 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 958 double fshift,int its,Vec xx) 959 { 960 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 961 Mat AA = mat->A, BB = mat->B; 962 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 963 Scalar *b,*x,*xs,*ls,d,*v,sum; 964 int ierr,*idx, *diag; 965 int n = mat->n, m = mat->m, i,shift = A->indexshift; 966 967 PetscFunctionBegin; 968 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 969 xs = x + shift; /* shift by one for index start of 1 */ 970 ls = ls + shift; 971 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 972 diag = A->diag; 973 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 974 if (flag & SOR_ZERO_INITIAL_GUESS) { 975 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 979 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 980 while (its--) { 981 /* go down through the rows */ 982 for ( i=0; i<m; i++ ) { 983 n = A->i[i+1] - A->i[i]; 984 idx = A->j + A->i[i] + shift; 985 v = A->a + A->i[i] + shift; 986 sum = b[i]; 987 SPARSEDENSEMDOT(sum,xs,v,idx,n); 988 d = fshift + A->a[diag[i]+shift]; 989 n = B->i[i+1] - B->i[i]; 990 idx = B->j + B->i[i] + shift; 991 v = B->a + B->i[i] + shift; 992 SPARSEDENSEMDOT(sum,ls,v,idx,n); 993 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 994 } 995 /* come up through the rows */ 996 for ( i=m-1; i>-1; i-- ) { 997 n = A->i[i+1] - A->i[i]; 998 idx = A->j + A->i[i] + shift; 999 v = A->a + A->i[i] + shift; 1000 sum = b[i]; 1001 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1002 d = fshift + A->a[diag[i]+shift]; 1003 n = B->i[i+1] - B->i[i]; 1004 idx = B->j + B->i[i] + shift; 1005 v = B->a + B->i[i] + shift; 1006 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1007 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1008 } 1009 } 1010 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1011 if (flag & SOR_ZERO_INITIAL_GUESS) { 1012 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1016 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1017 while (its--) { 1018 for ( i=0; i<m; i++ ) { 1019 n = A->i[i+1] - A->i[i]; 1020 idx = A->j + A->i[i] + shift; 1021 v = A->a + A->i[i] + shift; 1022 sum = b[i]; 1023 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1024 d = fshift + A->a[diag[i]+shift]; 1025 n = B->i[i+1] - B->i[i]; 1026 idx = B->j + B->i[i] + shift; 1027 v = B->a + B->i[i] + shift; 1028 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1029 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1030 } 1031 } 1032 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1038 mat->Mvctx); CHKERRQ(ierr); 1039 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1040 mat->Mvctx); CHKERRQ(ierr); 1041 while (its--) { 1042 for ( i=m-1; i>-1; i-- ) { 1043 n = A->i[i+1] - A->i[i]; 1044 idx = A->j + A->i[i] + shift; 1045 v = A->a + A->i[i] + shift; 1046 sum = b[i]; 1047 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1048 d = fshift + A->a[diag[i]+shift]; 1049 n = B->i[i+1] - B->i[i]; 1050 idx = B->j + B->i[i] + shift; 1051 v = B->a + B->i[i] + shift; 1052 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1053 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1054 } 1055 } 1056 } else { 1057 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1058 } 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNC__ 1063 #define __FUNC__ "MatGetInfo_MPIAIJ" 1064 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1065 { 1066 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1067 Mat A = mat->A, B = mat->B; 1068 int ierr; 1069 double isend[5], irecv[5]; 1070 1071 PetscFunctionBegin; 1072 info->block_size = 1.0; 1073 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1074 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1075 isend[3] = info->memory; isend[4] = info->mallocs; 1076 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1077 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1078 isend[3] += info->memory; isend[4] += info->mallocs; 1079 if (flag == MAT_LOCAL) { 1080 info->nz_used = isend[0]; 1081 info->nz_allocated = isend[1]; 1082 info->nz_unneeded = isend[2]; 1083 info->memory = isend[3]; 1084 info->mallocs = isend[4]; 1085 } else if (flag == MAT_GLOBAL_MAX) { 1086 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1087 info->nz_used = irecv[0]; 1088 info->nz_allocated = irecv[1]; 1089 info->nz_unneeded = irecv[2]; 1090 info->memory = irecv[3]; 1091 info->mallocs = irecv[4]; 1092 } else if (flag == MAT_GLOBAL_SUM) { 1093 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1094 info->nz_used = irecv[0]; 1095 info->nz_allocated = irecv[1]; 1096 info->nz_unneeded = irecv[2]; 1097 info->memory = irecv[3]; 1098 info->mallocs = irecv[4]; 1099 } 1100 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1101 info->fill_ratio_needed = 0; 1102 info->factor_mallocs = 0; 1103 info->rows_global = (double)mat->M; 1104 info->columns_global = (double)mat->N; 1105 info->rows_local = (double)mat->m; 1106 info->columns_local = (double)mat->N; 1107 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNC__ 1112 #define __FUNC__ "MatSetOption_MPIAIJ" 1113 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1114 { 1115 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1116 1117 PetscFunctionBegin; 1118 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1119 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1120 op == MAT_COLUMNS_UNSORTED || 1121 op == MAT_COLUMNS_SORTED || 1122 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1123 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1124 MatSetOption(a->A,op); 1125 MatSetOption(a->B,op); 1126 } else if (op == MAT_ROW_ORIENTED) { 1127 a->roworiented = 1; 1128 MatSetOption(a->A,op); 1129 MatSetOption(a->B,op); 1130 } else if (op == MAT_ROWS_SORTED || 1131 op == MAT_ROWS_UNSORTED || 1132 op == MAT_SYMMETRIC || 1133 op == MAT_STRUCTURALLY_SYMMETRIC || 1134 op == MAT_YES_NEW_DIAGONALS) 1135 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1136 else if (op == MAT_COLUMN_ORIENTED) { 1137 a->roworiented = 0; 1138 MatSetOption(a->A,op); 1139 MatSetOption(a->B,op); 1140 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1141 a->donotstash = 1; 1142 } else if (op == MAT_NO_NEW_DIAGONALS){ 1143 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1144 } else { 1145 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1146 } 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNC__ 1151 #define __FUNC__ "MatGetSize_MPIAIJ" 1152 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1153 { 1154 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1155 1156 PetscFunctionBegin; 1157 if (m) *m = mat->M; 1158 if (n) *n = mat->N; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNC__ 1163 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1164 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1165 { 1166 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1167 1168 PetscFunctionBegin; 1169 if (m) *m = mat->m; 1170 if (n) *n = mat->n; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNC__ 1175 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1176 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1177 { 1178 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1179 1180 PetscFunctionBegin; 1181 *m = mat->rstart; *n = mat->rend; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1186 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1187 1188 #undef __FUNC__ 1189 #define __FUNC__ "MatGetRow_MPIAIJ" 1190 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1191 { 1192 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1193 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1194 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1195 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1196 int *cmap, *idx_p; 1197 1198 PetscFunctionBegin; 1199 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1200 mat->getrowactive = PETSC_TRUE; 1201 1202 if (!mat->rowvalues && (idx || v)) { 1203 /* 1204 allocate enough space to hold information from the longest row. 1205 */ 1206 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1207 int max = 1,m = mat->m,tmp; 1208 for ( i=0; i<m; i++ ) { 1209 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1210 if (max < tmp) { max = tmp; } 1211 } 1212 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1213 CHKPTRQ(mat->rowvalues); 1214 mat->rowindices = (int *) (mat->rowvalues + max); 1215 } 1216 1217 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1218 lrow = row - rstart; 1219 1220 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1221 if (!v) {pvA = 0; pvB = 0;} 1222 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1223 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1224 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1225 nztot = nzA + nzB; 1226 1227 cmap = mat->garray; 1228 if (v || idx) { 1229 if (nztot) { 1230 /* Sort by increasing column numbers, assuming A and B already sorted */ 1231 int imark = -1; 1232 if (v) { 1233 *v = v_p = mat->rowvalues; 1234 for ( i=0; i<nzB; i++ ) { 1235 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1236 else break; 1237 } 1238 imark = i; 1239 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1240 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1241 } 1242 if (idx) { 1243 *idx = idx_p = mat->rowindices; 1244 if (imark > -1) { 1245 for ( i=0; i<imark; i++ ) { 1246 idx_p[i] = cmap[cworkB[i]]; 1247 } 1248 } else { 1249 for ( i=0; i<nzB; i++ ) { 1250 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1251 else break; 1252 } 1253 imark = i; 1254 } 1255 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1256 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1257 } 1258 } 1259 else { 1260 if (idx) *idx = 0; 1261 if (v) *v = 0; 1262 } 1263 } 1264 *nz = nztot; 1265 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1266 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNC__ 1271 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1272 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1273 { 1274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1275 1276 PetscFunctionBegin; 1277 if (aij->getrowactive == PETSC_FALSE) { 1278 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1279 } 1280 aij->getrowactive = PETSC_FALSE; 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNC__ 1285 #define __FUNC__ "MatNorm_MPIAIJ" 1286 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1287 { 1288 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1289 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1290 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1291 double sum = 0.0; 1292 Scalar *v; 1293 1294 PetscFunctionBegin; 1295 if (aij->size == 1) { 1296 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1297 } else { 1298 if (type == NORM_FROBENIUS) { 1299 v = amat->a; 1300 for (i=0; i<amat->nz; i++ ) { 1301 #if defined(USE_PETSC_COMPLEX) 1302 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1303 #else 1304 sum += (*v)*(*v); v++; 1305 #endif 1306 } 1307 v = bmat->a; 1308 for (i=0; i<bmat->nz; i++ ) { 1309 #if defined(USE_PETSC_COMPLEX) 1310 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1311 #else 1312 sum += (*v)*(*v); v++; 1313 #endif 1314 } 1315 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1316 *norm = sqrt(*norm); 1317 } else if (type == NORM_1) { /* max column norm */ 1318 double *tmp, *tmp2; 1319 int *jj, *garray = aij->garray; 1320 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1321 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1322 PetscMemzero(tmp,aij->N*sizeof(double)); 1323 *norm = 0.0; 1324 v = amat->a; jj = amat->j; 1325 for ( j=0; j<amat->nz; j++ ) { 1326 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1327 } 1328 v = bmat->a; jj = bmat->j; 1329 for ( j=0; j<bmat->nz; j++ ) { 1330 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1331 } 1332 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1333 for ( j=0; j<aij->N; j++ ) { 1334 if (tmp2[j] > *norm) *norm = tmp2[j]; 1335 } 1336 PetscFree(tmp); PetscFree(tmp2); 1337 } else if (type == NORM_INFINITY) { /* max row norm */ 1338 double ntemp = 0.0; 1339 for ( j=0; j<amat->m; j++ ) { 1340 v = amat->a + amat->i[j] + shift; 1341 sum = 0.0; 1342 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1343 sum += PetscAbsScalar(*v); v++; 1344 } 1345 v = bmat->a + bmat->i[j] + shift; 1346 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1347 sum += PetscAbsScalar(*v); v++; 1348 } 1349 if (sum > ntemp) ntemp = sum; 1350 } 1351 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1352 } else { 1353 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1354 } 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNC__ 1360 #define __FUNC__ "MatTranspose_MPIAIJ" 1361 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1362 { 1363 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1364 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1365 int ierr,shift = Aloc->indexshift; 1366 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1367 Mat B; 1368 Scalar *array; 1369 1370 PetscFunctionBegin; 1371 if (matout == PETSC_NULL && M != N) { 1372 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1373 } 1374 1375 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1376 1377 /* copy over the A part */ 1378 Aloc = (Mat_SeqAIJ*) a->A->data; 1379 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1380 row = a->rstart; 1381 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1382 for ( i=0; i<m; i++ ) { 1383 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1385 } 1386 aj = Aloc->j; 1387 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1388 1389 /* copy over the B part */ 1390 Aloc = (Mat_SeqAIJ*) a->B->data; 1391 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1392 row = a->rstart; 1393 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1394 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1395 for ( i=0; i<m; i++ ) { 1396 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1397 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1398 } 1399 PetscFree(ct); 1400 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1402 if (matout != PETSC_NULL) { 1403 *matout = B; 1404 } else { 1405 PetscOps *Abops; 1406 struct _MatOps *Aops; 1407 1408 /* This isn't really an in-place transpose .... but free data structures from a */ 1409 PetscFree(a->rowners); 1410 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1411 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1412 if (a->colmap) PetscFree(a->colmap); 1413 if (a->garray) PetscFree(a->garray); 1414 if (a->lvec) VecDestroy(a->lvec); 1415 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1416 PetscFree(a); 1417 1418 /* 1419 This is horrible, horrible code. We need to keep the 1420 A pointers for the bops and ops but copy everything 1421 else from C. 1422 */ 1423 Abops = A->bops; 1424 Aops = A->ops; 1425 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1426 A->bops = Abops; 1427 A->ops = Aops; 1428 PetscHeaderDestroy(B); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNC__ 1434 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1435 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1436 { 1437 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1438 Mat a = aij->A, b = aij->B; 1439 int ierr,s1,s2,s3; 1440 1441 PetscFunctionBegin; 1442 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1443 if (rr) { 1444 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1445 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1446 /* Overlap communication with computation. */ 1447 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1448 } 1449 if (ll) { 1450 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1451 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1452 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1453 } 1454 /* scale the diagonal block */ 1455 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1456 1457 if (rr) { 1458 /* Do a scatter end and then right scale the off-diagonal block */ 1459 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1460 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1461 } 1462 1463 PetscFunctionReturn(0); 1464 } 1465 1466 1467 extern int MatPrintHelp_SeqAIJ(Mat); 1468 #undef __FUNC__ 1469 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1470 int MatPrintHelp_MPIAIJ(Mat A) 1471 { 1472 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1473 int ierr; 1474 1475 PetscFunctionBegin; 1476 if (!a->rank) { 1477 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNC__ 1483 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1484 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1485 { 1486 PetscFunctionBegin; 1487 *bs = 1; 1488 PetscFunctionReturn(0); 1489 } 1490 #undef __FUNC__ 1491 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1492 int MatSetUnfactored_MPIAIJ(Mat A) 1493 { 1494 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1495 int ierr; 1496 1497 PetscFunctionBegin; 1498 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNC__ 1503 #define __FUNC__ "MatEqual_MPIAIJ" 1504 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1505 { 1506 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1507 Mat a, b, c, d; 1508 PetscTruth flg; 1509 int ierr; 1510 1511 PetscFunctionBegin; 1512 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1513 a = matA->A; b = matA->B; 1514 c = matB->A; d = matB->B; 1515 1516 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1517 if (flg == PETSC_TRUE) { 1518 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1519 } 1520 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1521 PetscFunctionReturn(0); 1522 } 1523 1524 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1525 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1526 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1527 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1528 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1529 1530 /* -------------------------------------------------------------------*/ 1531 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1532 MatGetRow_MPIAIJ, 1533 MatRestoreRow_MPIAIJ, 1534 MatMult_MPIAIJ, 1535 MatMultAdd_MPIAIJ, 1536 MatMultTrans_MPIAIJ, 1537 MatMultTransAdd_MPIAIJ, 1538 0, 1539 0, 1540 0, 1541 0, 1542 0, 1543 0, 1544 MatRelax_MPIAIJ, 1545 MatTranspose_MPIAIJ, 1546 MatGetInfo_MPIAIJ, 1547 MatEqual_MPIAIJ, 1548 MatGetDiagonal_MPIAIJ, 1549 MatDiagonalScale_MPIAIJ, 1550 MatNorm_MPIAIJ, 1551 MatAssemblyBegin_MPIAIJ, 1552 MatAssemblyEnd_MPIAIJ, 1553 0, 1554 MatSetOption_MPIAIJ, 1555 MatZeroEntries_MPIAIJ, 1556 MatZeroRows_MPIAIJ, 1557 0, 1558 0, 1559 0, 1560 0, 1561 MatGetSize_MPIAIJ, 1562 MatGetLocalSize_MPIAIJ, 1563 MatGetOwnershipRange_MPIAIJ, 1564 0, 1565 0, 1566 0, 1567 0, 1568 MatConvertSameType_MPIAIJ, 1569 0, 1570 0, 1571 0, 1572 0, 1573 0, 1574 MatGetSubMatrices_MPIAIJ, 1575 MatIncreaseOverlap_MPIAIJ, 1576 MatGetValues_MPIAIJ, 1577 0, 1578 MatPrintHelp_MPIAIJ, 1579 MatScale_MPIAIJ, 1580 0, 1581 0, 1582 0, 1583 MatGetBlockSize_MPIAIJ, 1584 0, 1585 0, 1586 0, 1587 0, 1588 MatFDColoringCreate_MPIAIJ, 1589 0, 1590 MatSetUnfactored_MPIAIJ, 1591 0, 1592 0, 1593 MatGetSubMatrix_MPIAIJ, 1594 0, 1595 0, 1596 MatGetMaps_Petsc}; 1597 1598 1599 #undef __FUNC__ 1600 #define __FUNC__ "MatCreateMPIAIJ" 1601 /*@C 1602 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1603 (the default parallel PETSc format). For good matrix assembly performance 1604 the user should preallocate the matrix storage by setting the parameters 1605 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1606 performance can be increased by more than a factor of 50. 1607 1608 Collective on MPI_Comm 1609 1610 Input Parameters: 1611 + comm - MPI communicator 1612 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1613 This value should be the same as the local size used in creating the 1614 y vector for the matrix-vector product y = Ax. 1615 . n - This value should be the same as the local size used in creating the 1616 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1617 calculated if N is given) For square matrices n is almost always m. 1618 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1619 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1620 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1621 (same for all local rows) 1622 . d_nzz - array containing the number of nonzeros in the various rows of the 1623 diagonal portion of the local submatrix (possibly different for each row) 1624 or PETSC_NULL. You must leave room for the diagonal entry even if 1625 it is zero. 1626 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1627 submatrix (same for all local rows). 1628 - o_nzz - array containing the number of nonzeros in the various rows of the 1629 off-diagonal portion of the local submatrix (possibly different for 1630 each row) or PETSC_NULL. 1631 1632 Output Parameter: 1633 . A - the matrix 1634 1635 Notes: 1636 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1637 than it must be used on all processors that share the object for that argument. 1638 1639 The AIJ format (also called the Yale sparse matrix format or 1640 compressed row storage), is fully compatible with standard Fortran 77 1641 storage. That is, the stored row and column indices can begin at 1642 either one (as in Fortran) or zero. See the users manual for details. 1643 1644 The user MUST specify either the local or global matrix dimensions 1645 (possibly both). 1646 1647 By default, this format uses inodes (identical nodes) when possible. 1648 We search for consecutive rows with the same nonzero structure, thereby 1649 reusing matrix information to achieve increased efficiency. 1650 1651 Options Database Keys: 1652 + -mat_aij_no_inode - Do not use inodes 1653 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1654 - -mat_aij_oneindex - Internally use indexing starting at 1 1655 rather than 0. Note that when calling MatSetValues(), 1656 the user still MUST index entries starting at 0! 1657 1658 Storage Information: 1659 For a square global matrix we define each processor's diagonal portion 1660 to be its local rows and the corresponding columns (a square submatrix); 1661 each processor's off-diagonal portion encompasses the remainder of the 1662 local matrix (a rectangular submatrix). 1663 1664 The user can specify preallocated storage for the diagonal part of 1665 the local submatrix with either d_nz or d_nnz (not both). Set 1666 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1667 memory allocation. Likewise, specify preallocated storage for the 1668 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1669 1670 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1671 the figure below we depict these three local rows and all columns (0-11). 1672 1673 .vb 1674 0 1 2 3 4 5 6 7 8 9 10 11 1675 ------------------- 1676 row 3 | o o o d d d o o o o o o 1677 row 4 | o o o d d d o o o o o o 1678 row 5 | o o o d d d o o o o o o 1679 ------------------- 1680 .ve 1681 1682 Thus, any entries in the d locations are stored in the d (diagonal) 1683 submatrix, and any entries in the o locations are stored in the 1684 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1685 stored simply in the MATSEQAIJ format for compressed row storage. 1686 1687 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1688 and o_nz should indicate the number of nonzeros per row in the o matrix. 1689 In general, for PDE problems in which most nonzeros are near the diagonal, 1690 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1691 or you will get TERRIBLE performance; see the users' manual chapter on 1692 matrices. 1693 1694 .keywords: matrix, aij, compressed row, sparse, parallel 1695 1696 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1697 @*/ 1698 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1699 { 1700 Mat B; 1701 Mat_MPIAIJ *b; 1702 int ierr, i,sum[2],work[2],size; 1703 1704 PetscFunctionBegin; 1705 MPI_Comm_size(comm,&size); 1706 if (size == 1) { 1707 if (M == PETSC_DECIDE) M = m; 1708 if (N == PETSC_DECIDE) N = n; 1709 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1710 PetscFunctionReturn(0); 1711 } 1712 1713 *A = 0; 1714 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1715 PLogObjectCreate(B); 1716 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1717 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1718 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1719 B->ops->destroy = MatDestroy_MPIAIJ; 1720 B->ops->view = MatView_MPIAIJ; 1721 B->factor = 0; 1722 B->assembled = PETSC_FALSE; 1723 B->mapping = 0; 1724 1725 B->insertmode = NOT_SET_VALUES; 1726 b->size = size; 1727 MPI_Comm_rank(comm,&b->rank); 1728 1729 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1730 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1731 } 1732 1733 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1734 work[0] = m; work[1] = n; 1735 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1736 if (M == PETSC_DECIDE) M = sum[0]; 1737 if (N == PETSC_DECIDE) N = sum[1]; 1738 } 1739 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1740 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1741 b->m = m; B->m = m; 1742 b->n = n; B->n = n; 1743 b->N = N; B->N = N; 1744 b->M = M; B->M = M; 1745 1746 /* build local table of row and column ownerships */ 1747 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1748 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1749 b->cowners = b->rowners + b->size + 2; 1750 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1751 b->rowners[0] = 0; 1752 for ( i=2; i<=b->size; i++ ) { 1753 b->rowners[i] += b->rowners[i-1]; 1754 } 1755 b->rstart = b->rowners[b->rank]; 1756 b->rend = b->rowners[b->rank+1]; 1757 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1758 b->cowners[0] = 0; 1759 for ( i=2; i<=b->size; i++ ) { 1760 b->cowners[i] += b->cowners[i-1]; 1761 } 1762 b->cstart = b->cowners[b->rank]; 1763 b->cend = b->cowners[b->rank+1]; 1764 1765 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1766 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1767 PLogObjectParent(B,b->A); 1768 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1769 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1770 PLogObjectParent(B,b->B); 1771 1772 /* build cache for off array entries formed */ 1773 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1774 b->donotstash = 0; 1775 b->colmap = 0; 1776 b->garray = 0; 1777 b->roworiented = 1; 1778 1779 /* stuff used for matrix vector multiply */ 1780 b->lvec = 0; 1781 b->Mvctx = 0; 1782 1783 /* stuff for MatGetRow() */ 1784 b->rowindices = 0; 1785 b->rowvalues = 0; 1786 b->getrowactive = PETSC_FALSE; 1787 1788 *A = B; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 #undef __FUNC__ 1793 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1794 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1795 { 1796 Mat mat; 1797 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1798 int ierr, len=0, flg; 1799 1800 PetscFunctionBegin; 1801 *newmat = 0; 1802 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1803 PLogObjectCreate(mat); 1804 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1805 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1806 mat->ops->destroy = MatDestroy_MPIAIJ; 1807 mat->ops->view = MatView_MPIAIJ; 1808 mat->factor = matin->factor; 1809 mat->assembled = PETSC_TRUE; 1810 1811 a->m = mat->m = oldmat->m; 1812 a->n = mat->n = oldmat->n; 1813 a->M = mat->M = oldmat->M; 1814 a->N = mat->N = oldmat->N; 1815 1816 a->rstart = oldmat->rstart; 1817 a->rend = oldmat->rend; 1818 a->cstart = oldmat->cstart; 1819 a->cend = oldmat->cend; 1820 a->size = oldmat->size; 1821 a->rank = oldmat->rank; 1822 mat->insertmode = NOT_SET_VALUES; 1823 a->rowvalues = 0; 1824 a->getrowactive = PETSC_FALSE; 1825 1826 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1827 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1828 a->cowners = a->rowners + a->size + 2; 1829 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1830 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1831 if (oldmat->colmap) { 1832 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1833 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1834 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1835 } else a->colmap = 0; 1836 if (oldmat->garray) { 1837 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1838 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1839 PLogObjectMemory(mat,len*sizeof(int)); 1840 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1841 } else a->garray = 0; 1842 1843 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1844 PLogObjectParent(mat,a->lvec); 1845 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1846 PLogObjectParent(mat,a->Mvctx); 1847 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1848 PLogObjectParent(mat,a->A); 1849 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1850 PLogObjectParent(mat,a->B); 1851 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1852 if (flg) { 1853 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1854 } 1855 *newmat = mat; 1856 PetscFunctionReturn(0); 1857 } 1858 1859 #include "sys.h" 1860 1861 #undef __FUNC__ 1862 #define __FUNC__ "MatLoad_MPIAIJ" 1863 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1864 { 1865 Mat A; 1866 Scalar *vals,*svals; 1867 MPI_Comm comm = ((PetscObject)viewer)->comm; 1868 MPI_Status status; 1869 int i, nz, ierr, j,rstart, rend, fd; 1870 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1871 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1872 int tag = ((PetscObject)viewer)->tag; 1873 1874 PetscFunctionBegin; 1875 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1876 if (!rank) { 1877 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1878 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1879 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1880 if (header[3] < 0) { 1881 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1882 } 1883 } 1884 1885 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1886 M = header[1]; N = header[2]; 1887 /* determine ownership of all rows */ 1888 m = M/size + ((M % size) > rank); 1889 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1890 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1891 rowners[0] = 0; 1892 for ( i=2; i<=size; i++ ) { 1893 rowners[i] += rowners[i-1]; 1894 } 1895 rstart = rowners[rank]; 1896 rend = rowners[rank+1]; 1897 1898 /* distribute row lengths to all processors */ 1899 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1900 offlens = ourlens + (rend-rstart); 1901 if (!rank) { 1902 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1903 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1904 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1905 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1906 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1907 PetscFree(sndcounts); 1908 } else { 1909 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1910 } 1911 1912 if (!rank) { 1913 /* calculate the number of nonzeros on each processor */ 1914 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1915 PetscMemzero(procsnz,size*sizeof(int)); 1916 for ( i=0; i<size; i++ ) { 1917 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1918 procsnz[i] += rowlengths[j]; 1919 } 1920 } 1921 PetscFree(rowlengths); 1922 1923 /* determine max buffer needed and allocate it */ 1924 maxnz = 0; 1925 for ( i=0; i<size; i++ ) { 1926 maxnz = PetscMax(maxnz,procsnz[i]); 1927 } 1928 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1929 1930 /* read in my part of the matrix column indices */ 1931 nz = procsnz[0]; 1932 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1933 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1934 1935 /* read in every one elses and ship off */ 1936 for ( i=1; i<size; i++ ) { 1937 nz = procsnz[i]; 1938 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1939 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1940 } 1941 PetscFree(cols); 1942 } else { 1943 /* determine buffer space needed for message */ 1944 nz = 0; 1945 for ( i=0; i<m; i++ ) { 1946 nz += ourlens[i]; 1947 } 1948 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1949 1950 /* receive message of column indices*/ 1951 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1952 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1953 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1954 } 1955 1956 /* loop over local rows, determining number of off diagonal entries */ 1957 PetscMemzero(offlens,m*sizeof(int)); 1958 jj = 0; 1959 for ( i=0; i<m; i++ ) { 1960 for ( j=0; j<ourlens[i]; j++ ) { 1961 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1962 jj++; 1963 } 1964 } 1965 1966 /* create our matrix */ 1967 for ( i=0; i<m; i++ ) { 1968 ourlens[i] -= offlens[i]; 1969 } 1970 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1971 A = *newmat; 1972 MatSetOption(A,MAT_COLUMNS_SORTED); 1973 for ( i=0; i<m; i++ ) { 1974 ourlens[i] += offlens[i]; 1975 } 1976 1977 if (!rank) { 1978 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1979 1980 /* read in my part of the matrix numerical values */ 1981 nz = procsnz[0]; 1982 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1983 1984 /* insert into matrix */ 1985 jj = rstart; 1986 smycols = mycols; 1987 svals = vals; 1988 for ( i=0; i<m; i++ ) { 1989 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1990 smycols += ourlens[i]; 1991 svals += ourlens[i]; 1992 jj++; 1993 } 1994 1995 /* read in other processors and ship out */ 1996 for ( i=1; i<size; i++ ) { 1997 nz = procsnz[i]; 1998 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1999 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2000 } 2001 PetscFree(procsnz); 2002 } else { 2003 /* receive numeric values */ 2004 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2005 2006 /* receive message of values*/ 2007 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2008 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2009 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2010 2011 /* insert into matrix */ 2012 jj = rstart; 2013 smycols = mycols; 2014 svals = vals; 2015 for ( i=0; i<m; i++ ) { 2016 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2017 smycols += ourlens[i]; 2018 svals += ourlens[i]; 2019 jj++; 2020 } 2021 } 2022 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2023 2024 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2025 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 #undef __FUNC__ 2030 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2031 /* 2032 Not great since it makes two copies of the submatrix, first an SeqAIJ 2033 in local and then by concatenating the local matrices the end result. 2034 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2035 */ 2036 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 2037 { 2038 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2039 Mat *local,M, Mreuse; 2040 Scalar *vwork,*aa; 2041 MPI_Comm comm = mat->comm; 2042 Mat_SeqAIJ *aij; 2043 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2044 2045 PetscFunctionBegin; 2046 MPI_Comm_rank(comm,&rank); 2047 MPI_Comm_size(comm,&size); 2048 2049 if (call == MAT_REUSE_MATRIX) { 2050 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2051 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2052 local = &Mreuse; 2053 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2054 } else { 2055 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2056 Mreuse = *local; 2057 PetscFree(local); 2058 } 2059 2060 /* 2061 m - number of local rows 2062 n - number of columns (same on all processors) 2063 rstart - first row in new global matrix generated 2064 */ 2065 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2066 if (call == MAT_INITIAL_MATRIX) { 2067 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2068 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2069 ii = aij->i; 2070 jj = aij->j; 2071 2072 /* 2073 Determine the number of non-zeros in the diagonal and off-diagonal 2074 portions of the matrix in order to do correct preallocation 2075 */ 2076 2077 /* first get start and end of "diagonal" columns */ 2078 if (csize == PETSC_DECIDE) { 2079 nlocal = n/size + ((n % size) > rank); 2080 } else { 2081 nlocal = csize; 2082 } 2083 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2084 rstart = rend - nlocal; 2085 if (rank == size - 1 && rend != n) { 2086 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2087 } 2088 2089 /* next, compute all the lengths */ 2090 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2091 olens = dlens + m; 2092 for ( i=0; i<m; i++ ) { 2093 jend = ii[i+1] - ii[i]; 2094 olen = 0; 2095 dlen = 0; 2096 for ( j=0; j<jend; j++ ) { 2097 if ( *jj < rstart || *jj >= rend) olen++; 2098 else dlen++; 2099 jj++; 2100 } 2101 olens[i] = olen; 2102 dlens[i] = dlen; 2103 } 2104 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2105 PetscFree(dlens); 2106 } else { 2107 int ml,nl; 2108 2109 M = *newmat; 2110 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2111 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2112 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2113 /* 2114 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2115 rather than the slower MatSetValues(). 2116 */ 2117 M->was_assembled = PETSC_TRUE; 2118 M->assembled = PETSC_FALSE; 2119 } 2120 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2121 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2122 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2123 ii = aij->i; 2124 jj = aij->j; 2125 aa = aij->a; 2126 for (i=0; i<m; i++) { 2127 row = rstart + i; 2128 nz = ii[i+1] - ii[i]; 2129 cwork = jj; jj += nz; 2130 vwork = aa; aa += nz; 2131 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2132 } 2133 2134 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2135 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2136 *newmat = M; 2137 2138 /* save submatrix used in processor for next request */ 2139 if (call == MAT_INITIAL_MATRIX) { 2140 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2141 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2142 } 2143 2144 PetscFunctionReturn(0); 2145 } 2146