1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.251 1998/06/19 15:56:02 bsmith Exp balay $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } else { 282 if (!aij->colmap) { 283 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 col = aij->colmap[idxn[j]] - 1; 286 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 287 else { 288 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 289 } 290 } 291 } 292 } else { 293 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 294 } 295 } 296 PetscFunctionReturn(0); 297 } 298 299 #undef __FUNC__ 300 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 301 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 304 MPI_Comm comm = mat->comm; 305 int size = aij->size, *owners = aij->rowners; 306 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 307 MPI_Request *send_waits,*recv_waits; 308 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 309 InsertMode addv; 310 Scalar *rvalues,*svalues; 311 312 PetscFunctionBegin; 313 if (aij->donotstash) { 314 aij->svalues = 0; aij->rvalues = 0; 315 aij->nsends = 0; aij->nrecvs = 0; 316 aij->send_waits = 0; aij->recv_waits = 0; 317 aij->rmax = 0; 318 PetscFunctionReturn(0); 319 } 320 321 /* make sure all processors are either in INSERTMODE or ADDMODE */ 322 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 323 if (addv == (ADD_VALUES|INSERT_VALUES)) { 324 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 325 } 326 mat->insertmode = addv; /* in case this processor had no cache */ 327 328 /* first count number of contributors to each processor */ 329 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 330 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 331 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 332 for ( i=0; i<aij->stash.n; i++ ) { 333 idx = aij->stash.idx[i]; 334 for ( j=0; j<size; j++ ) { 335 if (idx >= owners[j] && idx < owners[j+1]) { 336 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 337 } 338 } 339 } 340 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 341 342 /* inform other processors of number of messages and max length*/ 343 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 344 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 345 nreceives = work[rank]; 346 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 347 nmax = work[rank]; 348 PetscFree(work); 349 350 /* post receives: 351 1) each message will consist of ordered pairs 352 (global index,value) we store the global index as a double 353 to simplify the message passing. 354 2) since we don't know how long each individual message is we 355 allocate the largest needed buffer for each receive. Potentially 356 this is a lot of wasted space. 357 358 359 This could be done better. 360 */ 361 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 362 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 363 for ( i=0; i<nreceives; i++ ) { 364 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 365 comm,recv_waits+i);CHKERRQ(ierr); 366 } 367 368 /* do sends: 369 1) starts[i] gives the starting index in svalues for stuff going to 370 the ith processor 371 */ 372 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 373 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 374 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 375 starts[0] = 0; 376 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 377 for ( i=0; i<aij->stash.n; i++ ) { 378 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 379 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 380 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 381 } 382 PetscFree(owner); 383 starts[0] = 0; 384 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 385 count = 0; 386 for ( i=0; i<size; i++ ) { 387 if (procs[i]) { 388 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 389 } 390 } 391 PetscFree(starts); 392 PetscFree(nprocs); 393 394 /* Free cache space */ 395 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 396 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 397 398 aij->svalues = svalues; aij->rvalues = rvalues; 399 aij->nsends = nsends; aij->nrecvs = nreceives; 400 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 401 aij->rmax = nmax; 402 403 PetscFunctionReturn(0); 404 } 405 extern int MatSetUpMultiply_MPIAIJ(Mat); 406 407 #undef __FUNC__ 408 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 409 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 412 MPI_Status *send_status,recv_status; 413 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 414 int row,col,other_disassembled; 415 Scalar *values,val; 416 InsertMode addv = mat->insertmode; 417 418 PetscFunctionBegin; 419 420 /* wait on receives */ 421 while (count) { 422 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 423 /* unpack receives into our local space */ 424 values = aij->rvalues + 3*imdex*aij->rmax; 425 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 426 n = n/3; 427 for ( i=0; i<n; i++ ) { 428 row = (int) PetscReal(values[3*i]) - aij->rstart; 429 col = (int) PetscReal(values[3*i+1]); 430 val = values[3*i+2]; 431 if (col >= aij->cstart && col < aij->cend) { 432 col -= aij->cstart; 433 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 434 } else { 435 if (mat->was_assembled || mat->assembled) { 436 if (!aij->colmap) { 437 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 438 } 439 col = aij->colmap[col] - 1; 440 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 441 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 442 col = (int) PetscReal(values[3*i+1]); 443 } 444 } 445 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 446 } 447 } 448 count--; 449 } 450 if (aij->recv_waits) PetscFree(aij->recv_waits); 451 if (aij->rvalues) PetscFree(aij->rvalues); 452 453 /* wait on sends */ 454 if (aij->nsends) { 455 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 456 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 457 PetscFree(send_status); 458 } 459 if (aij->send_waits) PetscFree(aij->send_waits); 460 if (aij->svalues) PetscFree(aij->svalues); 461 462 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 463 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 464 465 /* determine if any processor has disassembled, if so we must 466 also disassemble ourselfs, in order that we may reassemble. */ 467 /* 468 if nonzero structure of submatrix B cannot change then we know that 469 no processor disassembled thus we can skip this stuff 470 */ 471 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 472 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 473 if (mat->was_assembled && !other_disassembled) { 474 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 475 } 476 } 477 478 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 479 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 480 } 481 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 482 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 483 484 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNC__ 489 #define __FUNC__ "MatZeroEntries_MPIAIJ" 490 int MatZeroEntries_MPIAIJ(Mat A) 491 { 492 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 493 int ierr; 494 495 PetscFunctionBegin; 496 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 497 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNC__ 502 #define __FUNC__ "MatZeroRows_MPIAIJ" 503 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 504 { 505 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 506 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 507 int *procs,*nprocs,j,found,idx,nsends,*work,row; 508 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 509 int *rvalues,tag = A->tag,count,base,slen,n,*source; 510 int *lens,imdex,*lrows,*values,rstart=l->rstart; 511 MPI_Comm comm = A->comm; 512 MPI_Request *send_waits,*recv_waits; 513 MPI_Status recv_status,*send_status; 514 IS istmp; 515 PetscTruth localdiag; 516 517 PetscFunctionBegin; 518 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 519 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 520 521 /* first count number of contributors to each processor */ 522 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 523 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 524 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 525 for ( i=0; i<N; i++ ) { 526 idx = rows[i]; 527 found = 0; 528 for ( j=0; j<size; j++ ) { 529 if (idx >= owners[j] && idx < owners[j+1]) { 530 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 531 } 532 } 533 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 534 } 535 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 536 537 /* inform other processors of number of messages and max length*/ 538 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 539 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 540 nrecvs = work[rank]; 541 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 542 nmax = work[rank]; 543 PetscFree(work); 544 545 /* post receives: */ 546 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 547 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 548 for ( i=0; i<nrecvs; i++ ) { 549 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 550 } 551 552 /* do sends: 553 1) starts[i] gives the starting index in svalues for stuff going to 554 the ith processor 555 */ 556 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 557 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 558 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 559 starts[0] = 0; 560 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 561 for ( i=0; i<N; i++ ) { 562 svalues[starts[owner[i]]++] = rows[i]; 563 } 564 ISRestoreIndices(is,&rows); 565 566 starts[0] = 0; 567 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 568 count = 0; 569 for ( i=0; i<size; i++ ) { 570 if (procs[i]) { 571 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 572 } 573 } 574 PetscFree(starts); 575 576 base = owners[rank]; 577 578 /* wait on receives */ 579 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 580 source = lens + nrecvs; 581 count = nrecvs; slen = 0; 582 while (count) { 583 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 584 /* unpack receives into our local space */ 585 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 586 source[imdex] = recv_status.MPI_SOURCE; 587 lens[imdex] = n; 588 slen += n; 589 count--; 590 } 591 PetscFree(recv_waits); 592 593 /* move the data into the send scatter */ 594 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 595 count = 0; 596 for ( i=0; i<nrecvs; i++ ) { 597 values = rvalues + i*nmax; 598 for ( j=0; j<lens[i]; j++ ) { 599 lrows[count++] = values[j] - base; 600 } 601 } 602 PetscFree(rvalues); PetscFree(lens); 603 PetscFree(owner); PetscFree(nprocs); 604 605 /* actually zap the local rows */ 606 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 607 PLogObjectParent(A,istmp); 608 609 /* 610 Zero the required rows. If the "diagonal block" of the matrix 611 is square and the user wishes to set the diagonal we use seperate 612 code so that MatSetValues() is not called for each diagonal allocating 613 new memory, thus calling lots of mallocs and slowing things down. 614 615 Contributed by: Mathew Knepley 616 */ 617 localdiag = PETSC_FALSE; 618 if (diag && (l->A->M == l->A->N)) { 619 localdiag = PETSC_TRUE; 620 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 621 } else { 622 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 623 } 624 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 625 ierr = ISDestroy(istmp); CHKERRQ(ierr); 626 627 if (diag && (localdiag == PETSC_FALSE)) { 628 for ( i = 0; i < slen; i++ ) { 629 row = lrows[i] + rstart; 630 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 631 } 632 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 633 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 634 } 635 636 if (diag) { 637 for ( i = 0; i < slen; i++ ) { 638 row = lrows[i] + rstart; 639 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 640 } 641 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 642 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 643 } 644 PetscFree(lrows); 645 646 /* wait on sends */ 647 if (nsends) { 648 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 649 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 650 PetscFree(send_status); 651 } 652 PetscFree(send_waits); PetscFree(svalues); 653 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatMult_MPIAIJ" 659 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 660 { 661 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 662 int ierr,nt; 663 664 PetscFunctionBegin; 665 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 666 if (nt != a->n) { 667 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 668 } 669 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 670 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 671 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 672 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNC__ 677 #define __FUNC__ "MatMultAdd_MPIAIJ" 678 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 681 int ierr; 682 683 PetscFunctionBegin; 684 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 685 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 686 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 687 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNC__ 692 #define __FUNC__ "MatMultTrans_MPIAIJ" 693 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 696 int ierr; 697 698 PetscFunctionBegin; 699 /* do nondiagonal part */ 700 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 701 /* send it on its way */ 702 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 703 /* do local part */ 704 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 705 /* receive remote parts: note this assumes the values are not actually */ 706 /* inserted in yy until the next line, which is true for my implementation*/ 707 /* but is not perhaps always true. */ 708 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 714 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 717 int ierr; 718 719 PetscFunctionBegin; 720 /* do nondiagonal part */ 721 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 722 /* send it on its way */ 723 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 724 /* do local part */ 725 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 726 /* receive remote parts: note this assumes the values are not actually */ 727 /* inserted in yy until the next line, which is true for my implementation*/ 728 /* but is not perhaps always true. */ 729 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 /* 734 This only works correctly for square matrices where the subblock A->A is the 735 diagonal block 736 */ 737 #undef __FUNC__ 738 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 739 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 740 { 741 int ierr; 742 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 743 744 PetscFunctionBegin; 745 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 746 if (a->rstart != a->cstart || a->rend != a->cend) { 747 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 748 } 749 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNC__ 754 #define __FUNC__ "MatScale_MPIAIJ" 755 int MatScale_MPIAIJ(Scalar *aa,Mat A) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 758 int ierr; 759 760 PetscFunctionBegin; 761 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 762 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNC__ 767 #define __FUNC__ "MatDestroy_MPIAIJ" 768 int MatDestroy_MPIAIJ(Mat mat) 769 { 770 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 771 int ierr; 772 773 PetscFunctionBegin; 774 if (--mat->refct > 0) PetscFunctionReturn(0); 775 776 if (mat->mapping) { 777 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 778 } 779 if (mat->bmapping) { 780 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 781 } 782 #if defined(USE_PETSC_LOG) 783 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 784 #endif 785 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 786 PetscFree(aij->rowners); 787 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 788 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 789 if (aij->colmap) PetscFree(aij->colmap); 790 if (aij->garray) PetscFree(aij->garray); 791 if (aij->lvec) VecDestroy(aij->lvec); 792 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 793 if (aij->rowvalues) PetscFree(aij->rowvalues); 794 PetscFree(aij); 795 PLogObjectDestroy(mat); 796 PetscHeaderDestroy(mat); 797 PetscFunctionReturn(0); 798 } 799 800 #undef __FUNC__ 801 #define __FUNC__ "MatView_MPIAIJ_Binary" 802 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 803 { 804 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 805 int ierr; 806 807 PetscFunctionBegin; 808 if (aij->size == 1) { 809 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 810 } 811 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNC__ 816 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 817 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 818 { 819 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 820 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 821 int ierr, format,shift = C->indexshift,rank; 822 FILE *fd; 823 ViewerType vtype; 824 825 PetscFunctionBegin; 826 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 827 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 828 ierr = ViewerGetFormat(viewer,&format); 829 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 830 MatInfo info; 831 int flg; 832 MPI_Comm_rank(mat->comm,&rank); 833 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 834 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 835 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 836 PetscSequentialPhaseBegin(mat->comm,1); 837 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 838 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 839 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 840 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 841 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 842 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 843 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 844 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 845 fflush(fd); 846 PetscSequentialPhaseEnd(mat->comm,1); 847 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 850 PetscFunctionReturn(0); 851 } 852 } 853 854 if (vtype == DRAW_VIEWER) { 855 Draw draw; 856 PetscTruth isnull; 857 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 858 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 859 } 860 861 if (vtype == ASCII_FILE_VIEWER) { 862 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 863 PetscSequentialPhaseBegin(mat->comm,1); 864 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 865 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 866 aij->cend); 867 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 868 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 869 fflush(fd); 870 PetscSequentialPhaseEnd(mat->comm,1); 871 } else { 872 int size = aij->size; 873 rank = aij->rank; 874 if (size == 1) { 875 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 876 } else { 877 /* assemble the entire matrix onto first processor. */ 878 Mat A; 879 Mat_SeqAIJ *Aloc; 880 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 881 Scalar *a; 882 883 if (!rank) { 884 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 885 } else { 886 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 887 } 888 PLogObjectParent(mat,A); 889 890 /* copy over the A part */ 891 Aloc = (Mat_SeqAIJ*) aij->A->data; 892 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 893 row = aij->rstart; 894 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 895 for ( i=0; i<m; i++ ) { 896 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 897 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 898 } 899 aj = Aloc->j; 900 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 901 902 /* copy over the B part */ 903 Aloc = (Mat_SeqAIJ*) aij->B->data; 904 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 905 row = aij->rstart; 906 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 907 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 908 for ( i=0; i<m; i++ ) { 909 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 910 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 911 } 912 PetscFree(ct); 913 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 914 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 915 /* 916 Everyone has to call to draw the matrix since the graphics waits are 917 synchronized across all processors that share the Draw object 918 */ 919 if (!rank || vtype == DRAW_VIEWER) { 920 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 921 } 922 ierr = MatDestroy(A); CHKERRQ(ierr); 923 } 924 } 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNC__ 929 #define __FUNC__ "MatView_MPIAIJ" 930 int MatView_MPIAIJ(Mat mat,Viewer viewer) 931 { 932 int ierr; 933 ViewerType vtype; 934 935 PetscFunctionBegin; 936 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 937 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 938 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 939 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 940 } else if (vtype == BINARY_FILE_VIEWER) { 941 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 942 } else { 943 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 944 } 945 PetscFunctionReturn(0); 946 } 947 948 /* 949 This has to provide several versions. 950 951 2) a) use only local smoothing updating outer values only once. 952 b) local smoothing updating outer values each inner iteration 953 3) color updating out values betwen colors. 954 */ 955 #undef __FUNC__ 956 #define __FUNC__ "MatRelax_MPIAIJ" 957 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 958 double fshift,int its,Vec xx) 959 { 960 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 961 Mat AA = mat->A, BB = mat->B; 962 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 963 Scalar *b,*x,*xs,*ls,d,*v,sum; 964 int ierr,*idx, *diag; 965 int n = mat->n, m = mat->m, i,shift = A->indexshift; 966 967 PetscFunctionBegin; 968 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 969 xs = x + shift; /* shift by one for index start of 1 */ 970 ls = ls + shift; 971 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 972 diag = A->diag; 973 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 974 if (flag & SOR_ZERO_INITIAL_GUESS) { 975 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 979 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 980 while (its--) { 981 /* go down through the rows */ 982 for ( i=0; i<m; i++ ) { 983 n = A->i[i+1] - A->i[i]; 984 idx = A->j + A->i[i] + shift; 985 v = A->a + A->i[i] + shift; 986 sum = b[i]; 987 SPARSEDENSEMDOT(sum,xs,v,idx,n); 988 d = fshift + A->a[diag[i]+shift]; 989 n = B->i[i+1] - B->i[i]; 990 idx = B->j + B->i[i] + shift; 991 v = B->a + B->i[i] + shift; 992 SPARSEDENSEMDOT(sum,ls,v,idx,n); 993 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 994 } 995 /* come up through the rows */ 996 for ( i=m-1; i>-1; i-- ) { 997 n = A->i[i+1] - A->i[i]; 998 idx = A->j + A->i[i] + shift; 999 v = A->a + A->i[i] + shift; 1000 sum = b[i]; 1001 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1002 d = fshift + A->a[diag[i]+shift]; 1003 n = B->i[i+1] - B->i[i]; 1004 idx = B->j + B->i[i] + shift; 1005 v = B->a + B->i[i] + shift; 1006 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1007 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1008 } 1009 } 1010 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1011 if (flag & SOR_ZERO_INITIAL_GUESS) { 1012 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1016 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1017 while (its--) { 1018 for ( i=0; i<m; i++ ) { 1019 n = A->i[i+1] - A->i[i]; 1020 idx = A->j + A->i[i] + shift; 1021 v = A->a + A->i[i] + shift; 1022 sum = b[i]; 1023 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1024 d = fshift + A->a[diag[i]+shift]; 1025 n = B->i[i+1] - B->i[i]; 1026 idx = B->j + B->i[i] + shift; 1027 v = B->a + B->i[i] + shift; 1028 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1029 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1030 } 1031 } 1032 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1038 mat->Mvctx); CHKERRQ(ierr); 1039 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1040 mat->Mvctx); CHKERRQ(ierr); 1041 while (its--) { 1042 for ( i=m-1; i>-1; i-- ) { 1043 n = A->i[i+1] - A->i[i]; 1044 idx = A->j + A->i[i] + shift; 1045 v = A->a + A->i[i] + shift; 1046 sum = b[i]; 1047 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1048 d = fshift + A->a[diag[i]+shift]; 1049 n = B->i[i+1] - B->i[i]; 1050 idx = B->j + B->i[i] + shift; 1051 v = B->a + B->i[i] + shift; 1052 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1053 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1054 } 1055 } 1056 } else { 1057 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1058 } 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNC__ 1063 #define __FUNC__ "MatGetInfo_MPIAIJ" 1064 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1065 { 1066 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1067 Mat A = mat->A, B = mat->B; 1068 int ierr; 1069 double isend[5], irecv[5]; 1070 1071 PetscFunctionBegin; 1072 info->block_size = 1.0; 1073 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1074 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1075 isend[3] = info->memory; isend[4] = info->mallocs; 1076 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1077 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1078 isend[3] += info->memory; isend[4] += info->mallocs; 1079 if (flag == MAT_LOCAL) { 1080 info->nz_used = isend[0]; 1081 info->nz_allocated = isend[1]; 1082 info->nz_unneeded = isend[2]; 1083 info->memory = isend[3]; 1084 info->mallocs = isend[4]; 1085 } else if (flag == MAT_GLOBAL_MAX) { 1086 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1087 info->nz_used = irecv[0]; 1088 info->nz_allocated = irecv[1]; 1089 info->nz_unneeded = irecv[2]; 1090 info->memory = irecv[3]; 1091 info->mallocs = irecv[4]; 1092 } else if (flag == MAT_GLOBAL_SUM) { 1093 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1094 info->nz_used = irecv[0]; 1095 info->nz_allocated = irecv[1]; 1096 info->nz_unneeded = irecv[2]; 1097 info->memory = irecv[3]; 1098 info->mallocs = irecv[4]; 1099 } 1100 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1101 info->fill_ratio_needed = 0; 1102 info->factor_mallocs = 0; 1103 info->rows_global = (double)mat->M; 1104 info->columns_global = (double)mat->N; 1105 info->rows_local = (double)mat->m; 1106 info->columns_local = (double)mat->N; 1107 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNC__ 1112 #define __FUNC__ "MatSetOption_MPIAIJ" 1113 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1114 { 1115 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1116 1117 PetscFunctionBegin; 1118 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1119 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1120 op == MAT_COLUMNS_UNSORTED || 1121 op == MAT_COLUMNS_SORTED || 1122 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1123 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1124 MatSetOption(a->A,op); 1125 MatSetOption(a->B,op); 1126 } else if (op == MAT_ROW_ORIENTED) { 1127 a->roworiented = 1; 1128 MatSetOption(a->A,op); 1129 MatSetOption(a->B,op); 1130 } else if (op == MAT_ROWS_SORTED || 1131 op == MAT_ROWS_UNSORTED || 1132 op == MAT_SYMMETRIC || 1133 op == MAT_STRUCTURALLY_SYMMETRIC || 1134 op == MAT_YES_NEW_DIAGONALS) 1135 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1136 else if (op == MAT_COLUMN_ORIENTED) { 1137 a->roworiented = 0; 1138 MatSetOption(a->A,op); 1139 MatSetOption(a->B,op); 1140 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1141 a->donotstash = 1; 1142 } else if (op == MAT_NO_NEW_DIAGONALS){ 1143 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1144 } else { 1145 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1146 } 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNC__ 1151 #define __FUNC__ "MatGetSize_MPIAIJ" 1152 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1153 { 1154 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1155 1156 PetscFunctionBegin; 1157 if (m) *m = mat->M; 1158 if (n) *n = mat->N; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNC__ 1163 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1164 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1165 { 1166 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1167 1168 PetscFunctionBegin; 1169 if (m) *m = mat->m; 1170 if (n) *n = mat->n; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNC__ 1175 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1176 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1177 { 1178 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1179 1180 PetscFunctionBegin; 1181 *m = mat->rstart; *n = mat->rend; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1186 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1187 1188 #undef __FUNC__ 1189 #define __FUNC__ "MatGetRow_MPIAIJ" 1190 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1191 { 1192 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1193 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1194 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1195 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1196 int *cmap, *idx_p; 1197 1198 PetscFunctionBegin; 1199 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1200 mat->getrowactive = PETSC_TRUE; 1201 1202 if (!mat->rowvalues && (idx || v)) { 1203 /* 1204 allocate enough space to hold information from the longest row. 1205 */ 1206 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1207 int max = 1,m = mat->m,tmp; 1208 for ( i=0; i<m; i++ ) { 1209 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1210 if (max < tmp) { max = tmp; } 1211 } 1212 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1213 CHKPTRQ(mat->rowvalues); 1214 mat->rowindices = (int *) (mat->rowvalues + max); 1215 } 1216 1217 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1218 lrow = row - rstart; 1219 1220 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1221 if (!v) {pvA = 0; pvB = 0;} 1222 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1223 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1224 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1225 nztot = nzA + nzB; 1226 1227 cmap = mat->garray; 1228 if (v || idx) { 1229 if (nztot) { 1230 /* Sort by increasing column numbers, assuming A and B already sorted */ 1231 int imark = -1; 1232 if (v) { 1233 *v = v_p = mat->rowvalues; 1234 for ( i=0; i<nzB; i++ ) { 1235 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1236 else break; 1237 } 1238 imark = i; 1239 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1240 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1241 } 1242 if (idx) { 1243 *idx = idx_p = mat->rowindices; 1244 if (imark > -1) { 1245 for ( i=0; i<imark; i++ ) { 1246 idx_p[i] = cmap[cworkB[i]]; 1247 } 1248 } else { 1249 for ( i=0; i<nzB; i++ ) { 1250 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1251 else break; 1252 } 1253 imark = i; 1254 } 1255 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1256 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1257 } 1258 } 1259 else { 1260 if (idx) *idx = 0; 1261 if (v) *v = 0; 1262 } 1263 } 1264 *nz = nztot; 1265 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1266 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNC__ 1271 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1272 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1273 { 1274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1275 1276 PetscFunctionBegin; 1277 if (aij->getrowactive == PETSC_FALSE) { 1278 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1279 } 1280 aij->getrowactive = PETSC_FALSE; 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #undef __FUNC__ 1285 #define __FUNC__ "MatNorm_MPIAIJ" 1286 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1287 { 1288 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1289 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1290 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1291 double sum = 0.0; 1292 Scalar *v; 1293 1294 PetscFunctionBegin; 1295 if (aij->size == 1) { 1296 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1297 } else { 1298 if (type == NORM_FROBENIUS) { 1299 v = amat->a; 1300 for (i=0; i<amat->nz; i++ ) { 1301 #if defined(USE_PETSC_COMPLEX) 1302 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1303 #else 1304 sum += (*v)*(*v); v++; 1305 #endif 1306 } 1307 v = bmat->a; 1308 for (i=0; i<bmat->nz; i++ ) { 1309 #if defined(USE_PETSC_COMPLEX) 1310 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1311 #else 1312 sum += (*v)*(*v); v++; 1313 #endif 1314 } 1315 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1316 *norm = sqrt(*norm); 1317 } else if (type == NORM_1) { /* max column norm */ 1318 double *tmp, *tmp2; 1319 int *jj, *garray = aij->garray; 1320 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1321 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1322 PetscMemzero(tmp,aij->N*sizeof(double)); 1323 *norm = 0.0; 1324 v = amat->a; jj = amat->j; 1325 for ( j=0; j<amat->nz; j++ ) { 1326 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1327 } 1328 v = bmat->a; jj = bmat->j; 1329 for ( j=0; j<bmat->nz; j++ ) { 1330 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1331 } 1332 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1333 for ( j=0; j<aij->N; j++ ) { 1334 if (tmp2[j] > *norm) *norm = tmp2[j]; 1335 } 1336 PetscFree(tmp); PetscFree(tmp2); 1337 } else if (type == NORM_INFINITY) { /* max row norm */ 1338 double ntemp = 0.0; 1339 for ( j=0; j<amat->m; j++ ) { 1340 v = amat->a + amat->i[j] + shift; 1341 sum = 0.0; 1342 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1343 sum += PetscAbsScalar(*v); v++; 1344 } 1345 v = bmat->a + bmat->i[j] + shift; 1346 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1347 sum += PetscAbsScalar(*v); v++; 1348 } 1349 if (sum > ntemp) ntemp = sum; 1350 } 1351 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1352 } else { 1353 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1354 } 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNC__ 1360 #define __FUNC__ "MatTranspose_MPIAIJ" 1361 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1362 { 1363 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1364 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1365 int ierr,shift = Aloc->indexshift; 1366 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1367 Mat B; 1368 Scalar *array; 1369 1370 PetscFunctionBegin; 1371 if (matout == PETSC_NULL && M != N) { 1372 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1373 } 1374 1375 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1376 1377 /* copy over the A part */ 1378 Aloc = (Mat_SeqAIJ*) a->A->data; 1379 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1380 row = a->rstart; 1381 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1382 for ( i=0; i<m; i++ ) { 1383 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1385 } 1386 aj = Aloc->j; 1387 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1388 1389 /* copy over the B part */ 1390 Aloc = (Mat_SeqAIJ*) a->B->data; 1391 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1392 row = a->rstart; 1393 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1394 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1395 for ( i=0; i<m; i++ ) { 1396 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1397 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1398 } 1399 PetscFree(ct); 1400 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1401 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1402 if (matout != PETSC_NULL) { 1403 *matout = B; 1404 } else { 1405 PetscOps *Abops; 1406 struct _MatOps *Aops; 1407 1408 /* This isn't really an in-place transpose .... but free data structures from a */ 1409 PetscFree(a->rowners); 1410 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1411 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1412 if (a->colmap) PetscFree(a->colmap); 1413 if (a->garray) PetscFree(a->garray); 1414 if (a->lvec) VecDestroy(a->lvec); 1415 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1416 PetscFree(a); 1417 1418 /* 1419 This is horrible, horrible code. We need to keep the 1420 A pointers for the bops and ops but copy everything 1421 else from C. 1422 */ 1423 Abops = A->bops; 1424 Aops = A->ops; 1425 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1426 A->bops = Abops; 1427 A->ops = Aops; 1428 PetscHeaderDestroy(B); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNC__ 1434 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1435 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1436 { 1437 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1438 Mat a = aij->A, b = aij->B; 1439 int ierr,s1,s2,s3; 1440 1441 PetscFunctionBegin; 1442 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1443 if (rr) { 1444 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1445 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1446 /* Overlap communication with computation. */ 1447 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1448 } 1449 if (ll) { 1450 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1451 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1452 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1453 } 1454 /* scale the diagonal block */ 1455 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1456 1457 if (rr) { 1458 /* Do a scatter end and then right scale the off-diagonal block */ 1459 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1460 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1461 } 1462 1463 PetscFunctionReturn(0); 1464 } 1465 1466 1467 extern int MatPrintHelp_SeqAIJ(Mat); 1468 #undef __FUNC__ 1469 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1470 int MatPrintHelp_MPIAIJ(Mat A) 1471 { 1472 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1473 int ierr; 1474 1475 PetscFunctionBegin; 1476 if (!a->rank) { 1477 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNC__ 1483 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1484 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1485 { 1486 PetscFunctionBegin; 1487 *bs = 1; 1488 PetscFunctionReturn(0); 1489 } 1490 #undef __FUNC__ 1491 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1492 int MatSetUnfactored_MPIAIJ(Mat A) 1493 { 1494 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1495 int ierr; 1496 1497 PetscFunctionBegin; 1498 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNC__ 1503 #define __FUNC__ "MatEqual_MPIAIJ" 1504 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1505 { 1506 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1507 Mat a, b, c, d; 1508 PetscTruth flg; 1509 int ierr; 1510 1511 PetscFunctionBegin; 1512 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1513 a = matA->A; b = matA->B; 1514 c = matB->A; d = matB->B; 1515 1516 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1517 if (flg == PETSC_TRUE) { 1518 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1519 } 1520 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1521 PetscFunctionReturn(0); 1522 } 1523 1524 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1525 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1526 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1527 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1528 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1529 1530 /* -------------------------------------------------------------------*/ 1531 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1532 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1533 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1534 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1535 0,0, 1536 0,0, 1537 0,0, 1538 MatRelax_MPIAIJ, 1539 MatTranspose_MPIAIJ, 1540 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1541 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1542 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1543 0, 1544 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1545 0,0,0,0, 1546 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1547 0,0, 1548 0,0,MatConvertSameType_MPIAIJ,0,0, 1549 0,0,0, 1550 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1551 MatPrintHelp_MPIAIJ, 1552 MatScale_MPIAIJ,0,0,0, 1553 MatGetBlockSize_MPIAIJ,0,0,0,0, 1554 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1555 0,0,MatGetSubMatrix_MPIAIJ}; 1556 1557 1558 #undef __FUNC__ 1559 #define __FUNC__ "MatCreateMPIAIJ" 1560 /*@C 1561 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1562 (the default parallel PETSc format). For good matrix assembly performance 1563 the user should preallocate the matrix storage by setting the parameters 1564 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1565 performance can be increased by more than a factor of 50. 1566 1567 Collective on MPI_Comm 1568 1569 Input Parameters: 1570 + comm - MPI communicator 1571 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1572 This value should be the same as the local size used in creating the 1573 y vector for the matrix-vector product y = Ax. 1574 . n - This value should be the same as the local size used in creating the 1575 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1576 calculated if N is given) For square matrices n is almost always m. 1577 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1578 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1579 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1580 (same for all local rows) 1581 . d_nzz - array containing the number of nonzeros in the various rows of the 1582 diagonal portion of the local submatrix (possibly different for each row) 1583 or PETSC_NULL. You must leave room for the diagonal entry even if 1584 it is zero. 1585 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1586 submatrix (same for all local rows). 1587 - o_nzz - array containing the number of nonzeros in the various rows of the 1588 off-diagonal portion of the local submatrix (possibly different for 1589 each row) or PETSC_NULL. 1590 1591 Output Parameter: 1592 . A - the matrix 1593 1594 Notes: 1595 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1596 than it must be used on all processors that share the object for that argument. 1597 1598 The AIJ format (also called the Yale sparse matrix format or 1599 compressed row storage), is fully compatible with standard Fortran 77 1600 storage. That is, the stored row and column indices can begin at 1601 either one (as in Fortran) or zero. See the users manual for details. 1602 1603 The user MUST specify either the local or global matrix dimensions 1604 (possibly both). 1605 1606 By default, this format uses inodes (identical nodes) when possible. 1607 We search for consecutive rows with the same nonzero structure, thereby 1608 reusing matrix information to achieve increased efficiency. 1609 1610 Options Database Keys: 1611 + -mat_aij_no_inode - Do not use inodes 1612 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1613 - -mat_aij_oneindex - Internally use indexing starting at 1 1614 rather than 0. Note that when calling MatSetValues(), 1615 the user still MUST index entries starting at 0! 1616 1617 Storage Information: 1618 For a square global matrix we define each processor's diagonal portion 1619 to be its local rows and the corresponding columns (a square submatrix); 1620 each processor's off-diagonal portion encompasses the remainder of the 1621 local matrix (a rectangular submatrix). 1622 1623 The user can specify preallocated storage for the diagonal part of 1624 the local submatrix with either d_nz or d_nnz (not both). Set 1625 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1626 memory allocation. Likewise, specify preallocated storage for the 1627 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1628 1629 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1630 the figure below we depict these three local rows and all columns (0-11). 1631 1632 .vb 1633 0 1 2 3 4 5 6 7 8 9 10 11 1634 ------------------- 1635 row 3 | o o o d d d o o o o o o 1636 row 4 | o o o d d d o o o o o o 1637 row 5 | o o o d d d o o o o o o 1638 ------------------- 1639 .ve 1640 1641 Thus, any entries in the d locations are stored in the d (diagonal) 1642 submatrix, and any entries in the o locations are stored in the 1643 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1644 stored simply in the MATSEQAIJ format for compressed row storage. 1645 1646 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1647 and o_nz should indicate the number of nonzeros per row in the o matrix. 1648 In general, for PDE problems in which most nonzeros are near the diagonal, 1649 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1650 or you will get TERRIBLE performance; see the users' manual chapter on 1651 matrices. 1652 1653 .keywords: matrix, aij, compressed row, sparse, parallel 1654 1655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1656 @*/ 1657 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1658 { 1659 Mat B; 1660 Mat_MPIAIJ *b; 1661 int ierr, i,sum[2],work[2],size; 1662 1663 PetscFunctionBegin; 1664 MPI_Comm_size(comm,&size); 1665 if (size == 1) { 1666 if (M == PETSC_DECIDE) M = m; 1667 if (N == PETSC_DECIDE) N = n; 1668 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 1672 *A = 0; 1673 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1674 PLogObjectCreate(B); 1675 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1676 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1677 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1678 B->ops->destroy = MatDestroy_MPIAIJ; 1679 B->ops->view = MatView_MPIAIJ; 1680 B->factor = 0; 1681 B->assembled = PETSC_FALSE; 1682 B->mapping = 0; 1683 1684 B->insertmode = NOT_SET_VALUES; 1685 b->size = size; 1686 MPI_Comm_rank(comm,&b->rank); 1687 1688 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1689 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1690 } 1691 1692 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1693 work[0] = m; work[1] = n; 1694 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1695 if (M == PETSC_DECIDE) M = sum[0]; 1696 if (N == PETSC_DECIDE) N = sum[1]; 1697 } 1698 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1699 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1700 b->m = m; B->m = m; 1701 b->n = n; B->n = n; 1702 b->N = N; B->N = N; 1703 b->M = M; B->M = M; 1704 1705 /* build local table of row and column ownerships */ 1706 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1707 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1708 b->cowners = b->rowners + b->size + 2; 1709 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1710 b->rowners[0] = 0; 1711 for ( i=2; i<=b->size; i++ ) { 1712 b->rowners[i] += b->rowners[i-1]; 1713 } 1714 b->rstart = b->rowners[b->rank]; 1715 b->rend = b->rowners[b->rank+1]; 1716 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1717 b->cowners[0] = 0; 1718 for ( i=2; i<=b->size; i++ ) { 1719 b->cowners[i] += b->cowners[i-1]; 1720 } 1721 b->cstart = b->cowners[b->rank]; 1722 b->cend = b->cowners[b->rank+1]; 1723 1724 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1725 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1726 PLogObjectParent(B,b->A); 1727 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1728 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1729 PLogObjectParent(B,b->B); 1730 1731 /* build cache for off array entries formed */ 1732 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1733 b->donotstash = 0; 1734 b->colmap = 0; 1735 b->garray = 0; 1736 b->roworiented = 1; 1737 1738 /* stuff used for matrix vector multiply */ 1739 b->lvec = 0; 1740 b->Mvctx = 0; 1741 1742 /* stuff for MatGetRow() */ 1743 b->rowindices = 0; 1744 b->rowvalues = 0; 1745 b->getrowactive = PETSC_FALSE; 1746 1747 *A = B; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 #undef __FUNC__ 1752 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1753 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1754 { 1755 Mat mat; 1756 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1757 int ierr, len=0, flg; 1758 1759 PetscFunctionBegin; 1760 *newmat = 0; 1761 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1762 PLogObjectCreate(mat); 1763 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1764 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1765 mat->ops->destroy = MatDestroy_MPIAIJ; 1766 mat->ops->view = MatView_MPIAIJ; 1767 mat->factor = matin->factor; 1768 mat->assembled = PETSC_TRUE; 1769 1770 a->m = mat->m = oldmat->m; 1771 a->n = mat->n = oldmat->n; 1772 a->M = mat->M = oldmat->M; 1773 a->N = mat->N = oldmat->N; 1774 1775 a->rstart = oldmat->rstart; 1776 a->rend = oldmat->rend; 1777 a->cstart = oldmat->cstart; 1778 a->cend = oldmat->cend; 1779 a->size = oldmat->size; 1780 a->rank = oldmat->rank; 1781 mat->insertmode = NOT_SET_VALUES; 1782 a->rowvalues = 0; 1783 a->getrowactive = PETSC_FALSE; 1784 1785 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1786 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1787 a->cowners = a->rowners + a->size + 2; 1788 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1789 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1790 if (oldmat->colmap) { 1791 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1792 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1793 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1794 } else a->colmap = 0; 1795 if (oldmat->garray) { 1796 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1797 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1798 PLogObjectMemory(mat,len*sizeof(int)); 1799 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1800 } else a->garray = 0; 1801 1802 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1803 PLogObjectParent(mat,a->lvec); 1804 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1805 PLogObjectParent(mat,a->Mvctx); 1806 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1807 PLogObjectParent(mat,a->A); 1808 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1809 PLogObjectParent(mat,a->B); 1810 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1811 if (flg) { 1812 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1813 } 1814 *newmat = mat; 1815 PetscFunctionReturn(0); 1816 } 1817 1818 #include "sys.h" 1819 1820 #undef __FUNC__ 1821 #define __FUNC__ "MatLoad_MPIAIJ" 1822 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1823 { 1824 Mat A; 1825 Scalar *vals,*svals; 1826 MPI_Comm comm = ((PetscObject)viewer)->comm; 1827 MPI_Status status; 1828 int i, nz, ierr, j,rstart, rend, fd; 1829 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1830 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1831 int tag = ((PetscObject)viewer)->tag; 1832 1833 PetscFunctionBegin; 1834 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1835 if (!rank) { 1836 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1837 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1838 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1839 if (header[3] < 0) { 1840 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1841 } 1842 } 1843 1844 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1845 M = header[1]; N = header[2]; 1846 /* determine ownership of all rows */ 1847 m = M/size + ((M % size) > rank); 1848 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1849 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1850 rowners[0] = 0; 1851 for ( i=2; i<=size; i++ ) { 1852 rowners[i] += rowners[i-1]; 1853 } 1854 rstart = rowners[rank]; 1855 rend = rowners[rank+1]; 1856 1857 /* distribute row lengths to all processors */ 1858 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1859 offlens = ourlens + (rend-rstart); 1860 if (!rank) { 1861 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1862 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1863 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1864 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1865 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1866 PetscFree(sndcounts); 1867 } else { 1868 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1869 } 1870 1871 if (!rank) { 1872 /* calculate the number of nonzeros on each processor */ 1873 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1874 PetscMemzero(procsnz,size*sizeof(int)); 1875 for ( i=0; i<size; i++ ) { 1876 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1877 procsnz[i] += rowlengths[j]; 1878 } 1879 } 1880 PetscFree(rowlengths); 1881 1882 /* determine max buffer needed and allocate it */ 1883 maxnz = 0; 1884 for ( i=0; i<size; i++ ) { 1885 maxnz = PetscMax(maxnz,procsnz[i]); 1886 } 1887 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1888 1889 /* read in my part of the matrix column indices */ 1890 nz = procsnz[0]; 1891 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1892 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1893 1894 /* read in every one elses and ship off */ 1895 for ( i=1; i<size; i++ ) { 1896 nz = procsnz[i]; 1897 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1898 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1899 } 1900 PetscFree(cols); 1901 } else { 1902 /* determine buffer space needed for message */ 1903 nz = 0; 1904 for ( i=0; i<m; i++ ) { 1905 nz += ourlens[i]; 1906 } 1907 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1908 1909 /* receive message of column indices*/ 1910 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1911 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1912 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1913 } 1914 1915 /* loop over local rows, determining number of off diagonal entries */ 1916 PetscMemzero(offlens,m*sizeof(int)); 1917 jj = 0; 1918 for ( i=0; i<m; i++ ) { 1919 for ( j=0; j<ourlens[i]; j++ ) { 1920 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1921 jj++; 1922 } 1923 } 1924 1925 /* create our matrix */ 1926 for ( i=0; i<m; i++ ) { 1927 ourlens[i] -= offlens[i]; 1928 } 1929 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1930 A = *newmat; 1931 MatSetOption(A,MAT_COLUMNS_SORTED); 1932 for ( i=0; i<m; i++ ) { 1933 ourlens[i] += offlens[i]; 1934 } 1935 1936 if (!rank) { 1937 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1938 1939 /* read in my part of the matrix numerical values */ 1940 nz = procsnz[0]; 1941 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1942 1943 /* insert into matrix */ 1944 jj = rstart; 1945 smycols = mycols; 1946 svals = vals; 1947 for ( i=0; i<m; i++ ) { 1948 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1949 smycols += ourlens[i]; 1950 svals += ourlens[i]; 1951 jj++; 1952 } 1953 1954 /* read in other processors and ship out */ 1955 for ( i=1; i<size; i++ ) { 1956 nz = procsnz[i]; 1957 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1958 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1959 } 1960 PetscFree(procsnz); 1961 } else { 1962 /* receive numeric values */ 1963 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1964 1965 /* receive message of values*/ 1966 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1967 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1968 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1969 1970 /* insert into matrix */ 1971 jj = rstart; 1972 smycols = mycols; 1973 svals = vals; 1974 for ( i=0; i<m; i++ ) { 1975 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1976 smycols += ourlens[i]; 1977 svals += ourlens[i]; 1978 jj++; 1979 } 1980 } 1981 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1982 1983 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1984 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #undef __FUNC__ 1989 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1990 /* 1991 Not great since it makes two copies of the submatrix, first an SeqAIJ 1992 in local and then by concatenating the local matrices the end result. 1993 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1994 */ 1995 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1996 { 1997 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1998 Mat *local,M, Mreuse; 1999 Scalar *vwork,*aa; 2000 MPI_Comm comm = mat->comm; 2001 Mat_SeqAIJ *aij; 2002 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2003 2004 PetscFunctionBegin; 2005 MPI_Comm_rank(comm,&rank); 2006 MPI_Comm_size(comm,&size); 2007 2008 if (call == MAT_REUSE_MATRIX) { 2009 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2010 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2011 local = &Mreuse; 2012 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2013 } else { 2014 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2015 Mreuse = *local; 2016 PetscFree(local); 2017 } 2018 2019 /* 2020 m - number of local rows 2021 n - number of columns (same on all processors) 2022 rstart - first row in new global matrix generated 2023 */ 2024 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2025 if (call == MAT_INITIAL_MATRIX) { 2026 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2027 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2028 ii = aij->i; 2029 jj = aij->j; 2030 2031 /* 2032 Determine the number of non-zeros in the diagonal and off-diagonal 2033 portions of the matrix in order to do correct preallocation 2034 */ 2035 2036 /* first get start and end of "diagonal" columns */ 2037 if (csize == PETSC_DECIDE) { 2038 nlocal = n/size + ((n % size) > rank); 2039 } else { 2040 nlocal = csize; 2041 } 2042 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2043 rstart = rend - nlocal; 2044 if (rank == size - 1 && rend != n) { 2045 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2046 } 2047 2048 /* next, compute all the lengths */ 2049 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2050 olens = dlens + m; 2051 for ( i=0; i<m; i++ ) { 2052 jend = ii[i+1] - ii[i]; 2053 olen = 0; 2054 dlen = 0; 2055 for ( j=0; j<jend; j++ ) { 2056 if ( *jj < rstart || *jj >= rend) olen++; 2057 else dlen++; 2058 jj++; 2059 } 2060 olens[i] = olen; 2061 dlens[i] = dlen; 2062 } 2063 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2064 PetscFree(dlens); 2065 } else { 2066 int ml,nl; 2067 2068 M = *newmat; 2069 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2070 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2071 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2072 /* 2073 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2074 rather than the slower MatSetValues(). 2075 */ 2076 M->was_assembled = PETSC_TRUE; 2077 M->assembled = PETSC_FALSE; 2078 } 2079 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2080 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2081 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2082 ii = aij->i; 2083 jj = aij->j; 2084 aa = aij->a; 2085 for (i=0; i<m; i++) { 2086 row = rstart + i; 2087 nz = ii[i+1] - ii[i]; 2088 cwork = jj; jj += nz; 2089 vwork = aa; aa += nz; 2090 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2091 } 2092 2093 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2094 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2095 *newmat = M; 2096 2097 /* save submatrix used in processor for next request */ 2098 if (call == MAT_INITIAL_MATRIX) { 2099 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2100 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2101 } 2102 2103 PetscFunctionReturn(0); 2104 } 2105