1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.268 1998/12/17 22:10:16 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 22 PetscFunctionBegin; 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 PetscFunctionReturn(0); 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #define CHUNKSIZE 15 33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 34 { \ 35 \ 36 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 37 rmax = aimax[row]; nrow = ailen[row]; \ 38 col1 = col - shift; \ 39 \ 40 low = 0; high = nrow; \ 41 while (high-low > 5) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > col) high = t; \ 44 else low = t; \ 45 } \ 46 for ( _i=0; _i<nrow; _i++ ) { \ 47 if (rp[_i] > col1) break; \ 48 if (rp[_i] == col1) { \ 49 if (addv == ADD_VALUES) ap[_i] += value; \ 50 else ap[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (nonew == 1) goto a_noinsert; \ 55 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 56 if (nrow >= rmax) { \ 57 /* there is no extra room in row, therefore enlarge */ \ 58 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 59 Scalar *new_a; \ 60 \ 61 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 62 \ 63 /* malloc new storage space */ \ 64 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 65 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 66 new_j = (int *) (new_a + new_nz); \ 67 new_i = new_j + new_nz; \ 68 \ 69 /* copy over old data into new slots */ \ 70 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 71 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 72 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 73 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 74 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 75 len*sizeof(int)); \ 76 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 77 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 78 len*sizeof(Scalar)); \ 79 /* free up old matrix storage */ \ 80 \ 81 PetscFree(a->a); \ 82 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 83 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 84 a->singlemalloc = 1; \ 85 \ 86 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 87 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 88 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 89 a->maxnz += CHUNKSIZE; \ 90 a->reallocs++; \ 91 } \ 92 N = nrow++ - 1; a->nz++; \ 93 /* shift up all the later entries in this row */ \ 94 for ( ii=N; ii>=_i; ii-- ) { \ 95 rp[ii+1] = rp[ii]; \ 96 ap[ii+1] = ap[ii]; \ 97 } \ 98 rp[_i] = col1; \ 99 ap[_i] = value; \ 100 a_noinsert: ; \ 101 ailen[row] = nrow; \ 102 } 103 104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 105 { \ 106 \ 107 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 108 rmax = bimax[row]; nrow = bilen[row]; \ 109 col1 = col - shift; \ 110 \ 111 low = 0; high = nrow; \ 112 while (high-low > 5) { \ 113 t = (low+high)/2; \ 114 if (rp[t] > col) high = t; \ 115 else low = t; \ 116 } \ 117 for ( _i=0; _i<nrow; _i++ ) { \ 118 if (rp[_i] > col1) break; \ 119 if (rp[_i] == col1) { \ 120 if (addv == ADD_VALUES) ap[_i] += value; \ 121 else ap[_i] = value; \ 122 goto b_noinsert; \ 123 } \ 124 } \ 125 if (nonew == 1) goto b_noinsert; \ 126 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 130 Scalar *new_a; \ 131 \ 132 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 136 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 137 new_j = (int *) (new_a + new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 142 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 143 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 144 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 145 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 146 len*sizeof(int)); \ 147 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 148 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 149 len*sizeof(Scalar)); \ 150 /* free up old matrix storage */ \ 151 \ 152 PetscFree(b->a); \ 153 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 154 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 155 b->singlemalloc = 1; \ 156 \ 157 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 158 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 159 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 160 b->maxnz += CHUNKSIZE; \ 161 b->reallocs++; \ 162 } \ 163 N = nrow++ - 1; b->nz++; \ 164 /* shift up all the later entries in this row */ \ 165 for ( ii=N; ii>=_i; ii-- ) { \ 166 rp[ii+1] = rp[ii]; \ 167 ap[ii+1] = ap[ii]; \ 168 } \ 169 rp[_i] = col1; \ 170 ap[_i] = value; \ 171 b_noinsert: ; \ 172 bilen[row] = nrow; \ 173 } 174 175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 176 #undef __FUNC__ 177 #define __FUNC__ "MatSetValues_MPIAIJ" 178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 179 { 180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 181 Scalar value; 182 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 183 int cstart = aij->cstart, cend = aij->cend,row,col; 184 int roworiented = aij->roworiented; 185 186 /* Some Variables required in the macro */ 187 Mat A = aij->A; 188 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 189 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 190 Scalar *aa = a->a; 191 192 Mat B = aij->B; 193 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 194 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 195 Scalar *ba = b->a; 196 197 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 198 int nonew = a->nonew,shift = a->indexshift; 199 Scalar *ap; 200 201 PetscFunctionBegin; 202 for ( i=0; i<m; i++ ) { 203 #if defined(USE_PETSC_BOPT_g) 204 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 205 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 206 #endif 207 if (im[i] >= rstart && im[i] < rend) { 208 row = im[i] - rstart; 209 for ( j=0; j<n; j++ ) { 210 if (in[j] >= cstart && in[j] < cend){ 211 col = in[j] - cstart; 212 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 213 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 214 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 215 } 216 #if defined(USE_PETSC_BOPT_g) 217 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 218 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 219 #endif 220 else { 221 if (mat->was_assembled) { 222 if (!aij->colmap) { 223 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 224 } 225 col = aij->colmap[in[j]] - 1; 226 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 227 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 228 col = in[j]; 229 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 230 B = aij->B; 231 b = (Mat_SeqAIJ *) B->data; 232 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 233 ba = b->a; 234 } 235 } else col = in[j]; 236 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 237 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 238 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 239 } 240 } 241 } 242 else { 243 if (roworiented && !aij->donotstash) { 244 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 245 } 246 else { 247 if (!aij->donotstash) { 248 row = im[i]; 249 for ( j=0; j<n; j++ ) { 250 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 251 } 252 } 253 } 254 } 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNC__ 260 #define __FUNC__ "MatGetValues_MPIAIJ" 261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 262 { 263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 264 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 265 int cstart = aij->cstart, cend = aij->cend,row,col; 266 267 PetscFunctionBegin; 268 for ( i=0; i<m; i++ ) { 269 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 270 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 271 if (idxm[i] >= rstart && idxm[i] < rend) { 272 row = idxm[i] - rstart; 273 for ( j=0; j<n; j++ ) { 274 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 275 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 276 if (idxn[j] >= cstart && idxn[j] < cend){ 277 col = idxn[j] - cstart; 278 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 279 } else { 280 if (!aij->colmap) { 281 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 col = aij->colmap[idxn[j]] - 1; 284 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 285 else { 286 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 287 } 288 } 289 } 290 } else { 291 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 292 } 293 } 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNC__ 298 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 299 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 300 { 301 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 302 MPI_Comm comm = mat->comm; 303 int size = aij->size, *owners = aij->rowners; 304 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 305 MPI_Request *send_waits,*recv_waits; 306 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 307 InsertMode addv; 308 Scalar *rvalues,*svalues; 309 310 PetscFunctionBegin; 311 if (aij->donotstash) { 312 aij->svalues = 0; aij->rvalues = 0; 313 aij->nsends = 0; aij->nrecvs = 0; 314 aij->send_waits = 0; aij->recv_waits = 0; 315 aij->rmax = 0; 316 PetscFunctionReturn(0); 317 } 318 319 /* make sure all processors are either in INSERTMODE or ADDMODE */ 320 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 321 if (addv == (ADD_VALUES|INSERT_VALUES)) { 322 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 323 } 324 mat->insertmode = addv; /* in case this processor had no cache */ 325 326 /* first count number of contributors to each processor */ 327 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 328 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 329 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 330 for ( i=0; i<aij->stash.n; i++ ) { 331 idx = aij->stash.idx[i]; 332 for ( j=0; j<size; j++ ) { 333 if (idx >= owners[j] && idx < owners[j+1]) { 334 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 335 } 336 } 337 } 338 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 339 340 /* inform other processors of number of messages and max length*/ 341 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 342 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 343 nreceives = work[rank]; 344 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 345 nmax = work[rank]; 346 PetscFree(work); 347 348 /* post receives: 349 1) each message will consist of ordered pairs 350 (global index,value) we store the global index as a double 351 to simplify the message passing. 352 2) since we don't know how long each individual message is we 353 allocate the largest needed buffer for each receive. Potentially 354 this is a lot of wasted space. 355 356 357 This could be done better. 358 */ 359 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 360 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 361 for ( i=0; i<nreceives; i++ ) { 362 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 363 comm,recv_waits+i);CHKERRQ(ierr); 364 } 365 366 /* do sends: 367 1) starts[i] gives the starting index in svalues for stuff going to 368 the ith processor 369 */ 370 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 371 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 372 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 373 starts[0] = 0; 374 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 375 for ( i=0; i<aij->stash.n; i++ ) { 376 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 377 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 378 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 379 } 380 PetscFree(owner); 381 starts[0] = 0; 382 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 383 count = 0; 384 for ( i=0; i<size; i++ ) { 385 if (procs[i]) { 386 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 387 } 388 } 389 PetscFree(starts); 390 PetscFree(nprocs); 391 392 /* Free cache space */ 393 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 394 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 395 396 aij->svalues = svalues; aij->rvalues = rvalues; 397 aij->nsends = nsends; aij->nrecvs = nreceives; 398 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 399 aij->rmax = nmax; 400 401 PetscFunctionReturn(0); 402 } 403 extern int MatSetUpMultiply_MPIAIJ(Mat); 404 405 #undef __FUNC__ 406 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 407 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 410 MPI_Status *send_status,recv_status; 411 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 412 int row,col,other_disassembled; 413 Scalar *values,val; 414 InsertMode addv = mat->insertmode; 415 416 PetscFunctionBegin; 417 418 /* wait on receives */ 419 while (count) { 420 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 421 /* unpack receives into our local space */ 422 values = aij->rvalues + 3*imdex*aij->rmax; 423 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 424 n = n/3; 425 for ( i=0; i<n; i++ ) { 426 row = (int) PetscReal(values[3*i]) - aij->rstart; 427 col = (int) PetscReal(values[3*i+1]); 428 val = values[3*i+2]; 429 if (col >= aij->cstart && col < aij->cend) { 430 col -= aij->cstart; 431 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 432 } else { 433 if (mat->was_assembled || mat->assembled) { 434 if (!aij->colmap) { 435 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 436 } 437 col = aij->colmap[col] - 1; 438 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 439 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 440 col = (int) PetscReal(values[3*i+1]); 441 } 442 } 443 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 444 } 445 } 446 count--; 447 } 448 if (aij->recv_waits) PetscFree(aij->recv_waits); 449 if (aij->rvalues) PetscFree(aij->rvalues); 450 451 /* wait on sends */ 452 if (aij->nsends) { 453 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 454 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 455 PetscFree(send_status); 456 } 457 if (aij->send_waits) PetscFree(aij->send_waits); 458 if (aij->svalues) PetscFree(aij->svalues); 459 460 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 461 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 462 463 /* determine if any processor has disassembled, if so we must 464 also disassemble ourselfs, in order that we may reassemble. */ 465 /* 466 if nonzero structure of submatrix B cannot change then we know that 467 no processor disassembled thus we can skip this stuff 468 */ 469 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 470 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 471 if (mat->was_assembled && !other_disassembled) { 472 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 473 } 474 } 475 476 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 477 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 478 } 479 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 480 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 481 482 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 483 PetscFunctionReturn(0); 484 } 485 486 #undef __FUNC__ 487 #define __FUNC__ "MatZeroEntries_MPIAIJ" 488 int MatZeroEntries_MPIAIJ(Mat A) 489 { 490 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 491 int ierr; 492 493 PetscFunctionBegin; 494 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 495 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNC__ 500 #define __FUNC__ "MatZeroRows_MPIAIJ" 501 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 502 { 503 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 504 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 505 int *procs,*nprocs,j,found,idx,nsends,*work,row; 506 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 507 int *rvalues,tag = A->tag,count,base,slen,n,*source; 508 int *lens,imdex,*lrows,*values,rstart=l->rstart; 509 MPI_Comm comm = A->comm; 510 MPI_Request *send_waits,*recv_waits; 511 MPI_Status recv_status,*send_status; 512 IS istmp; 513 PetscTruth localdiag; 514 515 PetscFunctionBegin; 516 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 517 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 518 519 /* first count number of contributors to each processor */ 520 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 521 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 522 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 523 for ( i=0; i<N; i++ ) { 524 idx = rows[i]; 525 found = 0; 526 for ( j=0; j<size; j++ ) { 527 if (idx >= owners[j] && idx < owners[j+1]) { 528 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 529 } 530 } 531 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 532 } 533 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 534 535 /* inform other processors of number of messages and max length*/ 536 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 537 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 538 nrecvs = work[rank]; 539 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 540 nmax = work[rank]; 541 PetscFree(work); 542 543 /* post receives: */ 544 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 545 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 546 for ( i=0; i<nrecvs; i++ ) { 547 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 548 } 549 550 /* do sends: 551 1) starts[i] gives the starting index in svalues for stuff going to 552 the ith processor 553 */ 554 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 555 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 556 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 557 starts[0] = 0; 558 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 559 for ( i=0; i<N; i++ ) { 560 svalues[starts[owner[i]]++] = rows[i]; 561 } 562 ISRestoreIndices(is,&rows); 563 564 starts[0] = 0; 565 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 566 count = 0; 567 for ( i=0; i<size; i++ ) { 568 if (procs[i]) { 569 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 570 } 571 } 572 PetscFree(starts); 573 574 base = owners[rank]; 575 576 /* wait on receives */ 577 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 578 source = lens + nrecvs; 579 count = nrecvs; slen = 0; 580 while (count) { 581 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 582 /* unpack receives into our local space */ 583 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 584 source[imdex] = recv_status.MPI_SOURCE; 585 lens[imdex] = n; 586 slen += n; 587 count--; 588 } 589 PetscFree(recv_waits); 590 591 /* move the data into the send scatter */ 592 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 593 count = 0; 594 for ( i=0; i<nrecvs; i++ ) { 595 values = rvalues + i*nmax; 596 for ( j=0; j<lens[i]; j++ ) { 597 lrows[count++] = values[j] - base; 598 } 599 } 600 PetscFree(rvalues); PetscFree(lens); 601 PetscFree(owner); PetscFree(nprocs); 602 603 /* actually zap the local rows */ 604 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 605 PLogObjectParent(A,istmp); 606 607 /* 608 Zero the required rows. If the "diagonal block" of the matrix 609 is square and the user wishes to set the diagonal we use seperate 610 code so that MatSetValues() is not called for each diagonal allocating 611 new memory, thus calling lots of mallocs and slowing things down. 612 613 Contributed by: Mathew Knepley 614 */ 615 localdiag = PETSC_FALSE; 616 if (diag && (l->A->M == l->A->N)) { 617 localdiag = PETSC_TRUE; 618 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 619 } else { 620 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 621 } 622 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 623 ierr = ISDestroy(istmp); CHKERRQ(ierr); 624 625 if (diag && (localdiag == PETSC_FALSE)) { 626 for ( i = 0; i < slen; i++ ) { 627 row = lrows[i] + rstart; 628 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 629 } 630 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 631 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 632 } 633 634 if (diag) { 635 for ( i = 0; i < slen; i++ ) { 636 row = lrows[i] + rstart; 637 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 638 } 639 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 640 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 641 } 642 PetscFree(lrows); 643 644 /* wait on sends */ 645 if (nsends) { 646 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 647 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 648 PetscFree(send_status); 649 } 650 PetscFree(send_waits); PetscFree(svalues); 651 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatMult_MPIAIJ" 657 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 658 { 659 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 660 int ierr,nt; 661 662 PetscFunctionBegin; 663 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 664 if (nt != a->n) { 665 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 666 } 667 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 668 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 669 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 670 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatMultAdd_MPIAIJ" 676 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 683 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 684 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 685 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNC__ 690 #define __FUNC__ "MatMultTrans_MPIAIJ" 691 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 692 { 693 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 694 int ierr; 695 696 PetscFunctionBegin; 697 /* do nondiagonal part */ 698 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 699 /* send it on its way */ 700 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 701 /* do local part */ 702 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 703 /* receive remote parts: note this assumes the values are not actually */ 704 /* inserted in yy until the next line, which is true for my implementation*/ 705 /* but is not perhaps always true. */ 706 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNC__ 711 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 712 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 715 int ierr; 716 717 PetscFunctionBegin; 718 /* do nondiagonal part */ 719 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 720 /* send it on its way */ 721 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 722 /* do local part */ 723 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 724 /* receive remote parts: note this assumes the values are not actually */ 725 /* inserted in yy until the next line, which is true for my implementation*/ 726 /* but is not perhaps always true. */ 727 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 /* 732 This only works correctly for square matrices where the subblock A->A is the 733 diagonal block 734 */ 735 #undef __FUNC__ 736 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 737 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 738 { 739 int ierr; 740 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 741 742 PetscFunctionBegin; 743 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 744 if (a->rstart != a->cstart || a->rend != a->cend) { 745 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 746 } 747 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNC__ 752 #define __FUNC__ "MatScale_MPIAIJ" 753 int MatScale_MPIAIJ(Scalar *aa,Mat A) 754 { 755 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 756 int ierr; 757 758 PetscFunctionBegin; 759 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 760 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNC__ 765 #define __FUNC__ "MatDestroy_MPIAIJ" 766 int MatDestroy_MPIAIJ(Mat mat) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 769 int ierr; 770 771 PetscFunctionBegin; 772 if (--mat->refct > 0) PetscFunctionReturn(0); 773 774 if (mat->mapping) { 775 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 776 } 777 if (mat->bmapping) { 778 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 779 } 780 if (mat->rmap) { 781 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 782 } 783 if (mat->cmap) { 784 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 785 } 786 #if defined(USE_PETSC_LOG) 787 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 788 #endif 789 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 790 PetscFree(aij->rowners); 791 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 792 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 793 if (aij->colmap) PetscFree(aij->colmap); 794 if (aij->garray) PetscFree(aij->garray); 795 if (aij->lvec) VecDestroy(aij->lvec); 796 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 797 if (aij->rowvalues) PetscFree(aij->rowvalues); 798 PetscFree(aij); 799 PLogObjectDestroy(mat); 800 PetscHeaderDestroy(mat); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNC__ 805 #define __FUNC__ "MatView_MPIAIJ_Binary" 806 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 807 { 808 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 809 int ierr; 810 811 PetscFunctionBegin; 812 if (aij->size == 1) { 813 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 814 } 815 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNC__ 820 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 821 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 822 { 823 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 824 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 825 int ierr, format,shift = C->indexshift,rank = aij->rank ; 826 int size = aij->size; 827 FILE *fd; 828 ViewerType vtype; 829 830 PetscFunctionBegin; 831 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 832 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 833 ierr = ViewerGetFormat(viewer,&format); 834 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 835 MatInfo info; 836 int flg; 837 MPI_Comm_rank(mat->comm,&rank); 838 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 839 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 840 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 841 PetscSequentialPhaseBegin(mat->comm,1); 842 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 843 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 844 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 845 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 846 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 847 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 848 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 849 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 850 fflush(fd); 851 PetscSequentialPhaseEnd(mat->comm,1); 852 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 855 PetscFunctionReturn(0); 856 } 857 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 858 Draw draw; 859 PetscTruth isnull; 860 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 861 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 862 } 863 864 if (size == 1) { 865 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 866 } else { 867 /* assemble the entire matrix onto first processor. */ 868 Mat A; 869 Mat_SeqAIJ *Aloc; 870 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 871 Scalar *a; 872 873 if (!rank) { 874 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 875 } else { 876 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 877 } 878 PLogObjectParent(mat,A); 879 880 /* copy over the A part */ 881 Aloc = (Mat_SeqAIJ*) aij->A->data; 882 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 883 row = aij->rstart; 884 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 885 for ( i=0; i<m; i++ ) { 886 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 887 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 888 } 889 aj = Aloc->j; 890 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 891 892 /* copy over the B part */ 893 Aloc = (Mat_SeqAIJ*) aij->B->data; 894 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 895 row = aij->rstart; 896 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 897 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 898 for ( i=0; i<m; i++ ) { 899 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 900 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 901 } 902 PetscFree(ct); 903 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 904 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 905 /* 906 Everyone has to call to draw the matrix since the graphics waits are 907 synchronized across all processors that share the Draw object 908 */ 909 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 910 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 911 } 912 ierr = MatDestroy(A); CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNC__ 918 #define __FUNC__ "MatView_MPIAIJ" 919 int MatView_MPIAIJ(Mat mat,Viewer viewer) 920 { 921 int ierr; 922 ViewerType vtype; 923 924 PetscFunctionBegin; 925 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 926 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 927 PetscTypeCompare(vtype,MATLAB_VIEWER)) { 928 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 929 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 930 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 931 } else { 932 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 933 } 934 PetscFunctionReturn(0); 935 } 936 937 /* 938 This has to provide several versions. 939 940 2) a) use only local smoothing updating outer values only once. 941 b) local smoothing updating outer values each inner iteration 942 3) color updating out values betwen colors. 943 */ 944 #undef __FUNC__ 945 #define __FUNC__ "MatRelax_MPIAIJ" 946 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 947 double fshift,int its,Vec xx) 948 { 949 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 950 Mat AA = mat->A, BB = mat->B; 951 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 952 Scalar *b,*x,*xs,*ls,d,*v,sum; 953 int ierr,*idx, *diag; 954 int n = mat->n, m = mat->m, i,shift = A->indexshift; 955 956 PetscFunctionBegin; 957 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 958 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 959 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 960 xs = x + shift; /* shift by one for index start of 1 */ 961 ls = ls + shift; 962 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 963 diag = A->diag; 964 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 965 if (flag & SOR_ZERO_INITIAL_GUESS) { 966 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 967 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 968 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 969 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 973 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 974 while (its--) { 975 /* go down through the rows */ 976 for ( i=0; i<m; i++ ) { 977 n = A->i[i+1] - A->i[i]; 978 PLogFlops(4*n+3); 979 idx = A->j + A->i[i] + shift; 980 v = A->a + A->i[i] + shift; 981 sum = b[i]; 982 SPARSEDENSEMDOT(sum,xs,v,idx,n); 983 d = fshift + A->a[diag[i]+shift]; 984 n = B->i[i+1] - B->i[i]; 985 idx = B->j + B->i[i] + shift; 986 v = B->a + B->i[i] + shift; 987 SPARSEDENSEMDOT(sum,ls,v,idx,n); 988 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 989 } 990 /* come up through the rows */ 991 for ( i=m-1; i>-1; i-- ) { 992 n = A->i[i+1] - A->i[i]; 993 PLogFlops(4*n+3) 994 idx = A->j + A->i[i] + shift; 995 v = A->a + A->i[i] + shift; 996 sum = b[i]; 997 SPARSEDENSEMDOT(sum,xs,v,idx,n); 998 d = fshift + A->a[diag[i]+shift]; 999 n = B->i[i+1] - B->i[i]; 1000 idx = B->j + B->i[i] + shift; 1001 v = B->a + B->i[i] + shift; 1002 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1003 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1004 } 1005 } 1006 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1007 if (flag & SOR_ZERO_INITIAL_GUESS) { 1008 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1009 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1010 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1011 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1016 while (its--) { 1017 for ( i=0; i<m; i++ ) { 1018 n = A->i[i+1] - A->i[i]; 1019 PLogFlops(4*n+3); 1020 idx = A->j + A->i[i] + shift; 1021 v = A->a + A->i[i] + shift; 1022 sum = b[i]; 1023 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1024 d = fshift + A->a[diag[i]+shift]; 1025 n = B->i[i+1] - B->i[i]; 1026 idx = B->j + B->i[i] + shift; 1027 v = B->a + B->i[i] + shift; 1028 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1029 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1030 } 1031 } 1032 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1035 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1036 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1037 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1042 while (its--) { 1043 for ( i=m-1; i>-1; i-- ) { 1044 n = A->i[i+1] - A->i[i]; 1045 PLogFlops(4*n+3); 1046 idx = A->j + A->i[i] + shift; 1047 v = A->a + A->i[i] + shift; 1048 sum = b[i]; 1049 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1050 d = fshift + A->a[diag[i]+shift]; 1051 n = B->i[i+1] - B->i[i]; 1052 idx = B->j + B->i[i] + shift; 1053 v = B->a + B->i[i] + shift; 1054 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1055 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1056 } 1057 } 1058 } else { 1059 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1060 } 1061 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1062 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1063 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNC__ 1068 #define __FUNC__ "MatGetInfo_MPIAIJ" 1069 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1070 { 1071 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1072 Mat A = mat->A, B = mat->B; 1073 int ierr; 1074 double isend[5], irecv[5]; 1075 1076 PetscFunctionBegin; 1077 info->block_size = 1.0; 1078 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1079 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1080 isend[3] = info->memory; isend[4] = info->mallocs; 1081 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1082 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1083 isend[3] += info->memory; isend[4] += info->mallocs; 1084 if (flag == MAT_LOCAL) { 1085 info->nz_used = isend[0]; 1086 info->nz_allocated = isend[1]; 1087 info->nz_unneeded = isend[2]; 1088 info->memory = isend[3]; 1089 info->mallocs = isend[4]; 1090 } else if (flag == MAT_GLOBAL_MAX) { 1091 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1092 info->nz_used = irecv[0]; 1093 info->nz_allocated = irecv[1]; 1094 info->nz_unneeded = irecv[2]; 1095 info->memory = irecv[3]; 1096 info->mallocs = irecv[4]; 1097 } else if (flag == MAT_GLOBAL_SUM) { 1098 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1099 info->nz_used = irecv[0]; 1100 info->nz_allocated = irecv[1]; 1101 info->nz_unneeded = irecv[2]; 1102 info->memory = irecv[3]; 1103 info->mallocs = irecv[4]; 1104 } 1105 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1106 info->fill_ratio_needed = 0; 1107 info->factor_mallocs = 0; 1108 info->rows_global = (double)mat->M; 1109 info->columns_global = (double)mat->N; 1110 info->rows_local = (double)mat->m; 1111 info->columns_local = (double)mat->N; 1112 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNC__ 1117 #define __FUNC__ "MatSetOption_MPIAIJ" 1118 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1119 { 1120 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1121 1122 PetscFunctionBegin; 1123 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1124 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1125 op == MAT_COLUMNS_UNSORTED || 1126 op == MAT_COLUMNS_SORTED || 1127 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1128 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1129 MatSetOption(a->A,op); 1130 MatSetOption(a->B,op); 1131 } else if (op == MAT_ROW_ORIENTED) { 1132 a->roworiented = 1; 1133 MatSetOption(a->A,op); 1134 MatSetOption(a->B,op); 1135 } else if (op == MAT_ROWS_SORTED || 1136 op == MAT_ROWS_UNSORTED || 1137 op == MAT_SYMMETRIC || 1138 op == MAT_STRUCTURALLY_SYMMETRIC || 1139 op == MAT_YES_NEW_DIAGONALS) 1140 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1141 else if (op == MAT_COLUMN_ORIENTED) { 1142 a->roworiented = 0; 1143 MatSetOption(a->A,op); 1144 MatSetOption(a->B,op); 1145 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1146 a->donotstash = 1; 1147 } else if (op == MAT_NO_NEW_DIAGONALS){ 1148 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1149 } else { 1150 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1151 } 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNC__ 1156 #define __FUNC__ "MatGetSize_MPIAIJ" 1157 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1158 { 1159 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1160 1161 PetscFunctionBegin; 1162 if (m) *m = mat->M; 1163 if (n) *n = mat->N; 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNC__ 1168 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1169 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1170 { 1171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1172 1173 PetscFunctionBegin; 1174 if (m) *m = mat->m; 1175 if (n) *n = mat->n; 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1181 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1182 { 1183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1184 1185 PetscFunctionBegin; 1186 *m = mat->rstart; *n = mat->rend; 1187 PetscFunctionReturn(0); 1188 } 1189 1190 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1191 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1192 1193 #undef __FUNC__ 1194 #define __FUNC__ "MatGetRow_MPIAIJ" 1195 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1196 { 1197 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1198 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1199 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1200 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1201 int *cmap, *idx_p; 1202 1203 PetscFunctionBegin; 1204 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1205 mat->getrowactive = PETSC_TRUE; 1206 1207 if (!mat->rowvalues && (idx || v)) { 1208 /* 1209 allocate enough space to hold information from the longest row. 1210 */ 1211 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1212 int max = 1,m = mat->m,tmp; 1213 for ( i=0; i<m; i++ ) { 1214 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1215 if (max < tmp) { max = tmp; } 1216 } 1217 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1218 mat->rowindices = (int *) (mat->rowvalues + max); 1219 } 1220 1221 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1222 lrow = row - rstart; 1223 1224 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1225 if (!v) {pvA = 0; pvB = 0;} 1226 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1227 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1228 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1229 nztot = nzA + nzB; 1230 1231 cmap = mat->garray; 1232 if (v || idx) { 1233 if (nztot) { 1234 /* Sort by increasing column numbers, assuming A and B already sorted */ 1235 int imark = -1; 1236 if (v) { 1237 *v = v_p = mat->rowvalues; 1238 for ( i=0; i<nzB; i++ ) { 1239 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1240 else break; 1241 } 1242 imark = i; 1243 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1244 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1245 } 1246 if (idx) { 1247 *idx = idx_p = mat->rowindices; 1248 if (imark > -1) { 1249 for ( i=0; i<imark; i++ ) { 1250 idx_p[i] = cmap[cworkB[i]]; 1251 } 1252 } else { 1253 for ( i=0; i<nzB; i++ ) { 1254 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1255 else break; 1256 } 1257 imark = i; 1258 } 1259 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1260 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1261 } 1262 } else { 1263 if (idx) *idx = 0; 1264 if (v) *v = 0; 1265 } 1266 } 1267 *nz = nztot; 1268 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1269 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNC__ 1274 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1275 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1276 { 1277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1278 1279 PetscFunctionBegin; 1280 if (aij->getrowactive == PETSC_FALSE) { 1281 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1282 } 1283 aij->getrowactive = PETSC_FALSE; 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNC__ 1288 #define __FUNC__ "MatNorm_MPIAIJ" 1289 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1292 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1293 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1294 double sum = 0.0; 1295 Scalar *v; 1296 1297 PetscFunctionBegin; 1298 if (aij->size == 1) { 1299 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1300 } else { 1301 if (type == NORM_FROBENIUS) { 1302 v = amat->a; 1303 for (i=0; i<amat->nz; i++ ) { 1304 #if defined(USE_PETSC_COMPLEX) 1305 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1306 #else 1307 sum += (*v)*(*v); v++; 1308 #endif 1309 } 1310 v = bmat->a; 1311 for (i=0; i<bmat->nz; i++ ) { 1312 #if defined(USE_PETSC_COMPLEX) 1313 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1314 #else 1315 sum += (*v)*(*v); v++; 1316 #endif 1317 } 1318 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1319 *norm = sqrt(*norm); 1320 } else if (type == NORM_1) { /* max column norm */ 1321 double *tmp, *tmp2; 1322 int *jj, *garray = aij->garray; 1323 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1324 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1325 PetscMemzero(tmp,aij->N*sizeof(double)); 1326 *norm = 0.0; 1327 v = amat->a; jj = amat->j; 1328 for ( j=0; j<amat->nz; j++ ) { 1329 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1330 } 1331 v = bmat->a; jj = bmat->j; 1332 for ( j=0; j<bmat->nz; j++ ) { 1333 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1334 } 1335 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1336 for ( j=0; j<aij->N; j++ ) { 1337 if (tmp2[j] > *norm) *norm = tmp2[j]; 1338 } 1339 PetscFree(tmp); PetscFree(tmp2); 1340 } else if (type == NORM_INFINITY) { /* max row norm */ 1341 double ntemp = 0.0; 1342 for ( j=0; j<amat->m; j++ ) { 1343 v = amat->a + amat->i[j] + shift; 1344 sum = 0.0; 1345 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1346 sum += PetscAbsScalar(*v); v++; 1347 } 1348 v = bmat->a + bmat->i[j] + shift; 1349 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1350 sum += PetscAbsScalar(*v); v++; 1351 } 1352 if (sum > ntemp) ntemp = sum; 1353 } 1354 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1355 } else { 1356 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1357 } 1358 } 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNC__ 1363 #define __FUNC__ "MatTranspose_MPIAIJ" 1364 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1365 { 1366 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1367 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1368 int ierr,shift = Aloc->indexshift; 1369 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1370 Mat B; 1371 Scalar *array; 1372 1373 PetscFunctionBegin; 1374 if (matout == PETSC_NULL && M != N) { 1375 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1376 } 1377 1378 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1379 1380 /* copy over the A part */ 1381 Aloc = (Mat_SeqAIJ*) a->A->data; 1382 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1383 row = a->rstart; 1384 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1385 for ( i=0; i<m; i++ ) { 1386 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1387 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1388 } 1389 aj = Aloc->j; 1390 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1391 1392 /* copy over the B part */ 1393 Aloc = (Mat_SeqAIJ*) a->B->data; 1394 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1395 row = a->rstart; 1396 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1397 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1398 for ( i=0; i<m; i++ ) { 1399 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1400 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1401 } 1402 PetscFree(ct); 1403 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1404 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1405 if (matout != PETSC_NULL) { 1406 *matout = B; 1407 } else { 1408 PetscOps *Abops; 1409 struct _MatOps *Aops; 1410 1411 /* This isn't really an in-place transpose .... but free data structures from a */ 1412 PetscFree(a->rowners); 1413 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1414 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1415 if (a->colmap) PetscFree(a->colmap); 1416 if (a->garray) PetscFree(a->garray); 1417 if (a->lvec) VecDestroy(a->lvec); 1418 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1419 PetscFree(a); 1420 1421 /* 1422 This is horrible, horrible code. We need to keep the 1423 A pointers for the bops and ops but copy everything 1424 else from C. 1425 */ 1426 Abops = A->bops; 1427 Aops = A->ops; 1428 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1429 A->bops = Abops; 1430 A->ops = Aops; 1431 PetscHeaderDestroy(B); 1432 } 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1438 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1441 Mat a = aij->A, b = aij->B; 1442 int ierr,s1,s2,s3; 1443 1444 PetscFunctionBegin; 1445 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1446 if (rr) { 1447 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1448 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1449 /* Overlap communication with computation. */ 1450 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1451 } 1452 if (ll) { 1453 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1454 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1455 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1456 } 1457 /* scale the diagonal block */ 1458 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1459 1460 if (rr) { 1461 /* Do a scatter end and then right scale the off-diagonal block */ 1462 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1463 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1464 } 1465 1466 PetscFunctionReturn(0); 1467 } 1468 1469 1470 extern int MatPrintHelp_SeqAIJ(Mat); 1471 #undef __FUNC__ 1472 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1473 int MatPrintHelp_MPIAIJ(Mat A) 1474 { 1475 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1476 int ierr; 1477 1478 PetscFunctionBegin; 1479 if (!a->rank) { 1480 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNC__ 1486 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1487 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1488 { 1489 PetscFunctionBegin; 1490 *bs = 1; 1491 PetscFunctionReturn(0); 1492 } 1493 #undef __FUNC__ 1494 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1495 int MatSetUnfactored_MPIAIJ(Mat A) 1496 { 1497 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1498 int ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNC__ 1506 #define __FUNC__ "MatEqual_MPIAIJ" 1507 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1508 { 1509 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1510 Mat a, b, c, d; 1511 PetscTruth flg; 1512 int ierr; 1513 1514 PetscFunctionBegin; 1515 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1516 a = matA->A; b = matA->B; 1517 c = matB->A; d = matB->B; 1518 1519 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1520 if (flg == PETSC_TRUE) { 1521 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1522 } 1523 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNC__ 1528 #define __FUNC__ "MatCopy_MPIAIJ" 1529 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1530 { 1531 int ierr; 1532 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1533 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1534 1535 PetscFunctionBegin; 1536 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1537 /* because of the column compression in the off-processor part of the matrix a->B, 1538 the number of columns in a->B and b->B may be different, hence we cannot call 1539 the MatCopy() directly on the two parts. If need be, we can provide a more 1540 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1541 then copying the submatrices */ 1542 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1543 } else { 1544 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1545 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1551 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1552 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1553 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1554 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1555 1556 /* -------------------------------------------------------------------*/ 1557 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1558 MatGetRow_MPIAIJ, 1559 MatRestoreRow_MPIAIJ, 1560 MatMult_MPIAIJ, 1561 MatMultAdd_MPIAIJ, 1562 MatMultTrans_MPIAIJ, 1563 MatMultTransAdd_MPIAIJ, 1564 0, 1565 0, 1566 0, 1567 0, 1568 0, 1569 0, 1570 MatRelax_MPIAIJ, 1571 MatTranspose_MPIAIJ, 1572 MatGetInfo_MPIAIJ, 1573 MatEqual_MPIAIJ, 1574 MatGetDiagonal_MPIAIJ, 1575 MatDiagonalScale_MPIAIJ, 1576 MatNorm_MPIAIJ, 1577 MatAssemblyBegin_MPIAIJ, 1578 MatAssemblyEnd_MPIAIJ, 1579 0, 1580 MatSetOption_MPIAIJ, 1581 MatZeroEntries_MPIAIJ, 1582 MatZeroRows_MPIAIJ, 1583 0, 1584 0, 1585 0, 1586 0, 1587 MatGetSize_MPIAIJ, 1588 MatGetLocalSize_MPIAIJ, 1589 MatGetOwnershipRange_MPIAIJ, 1590 0, 1591 0, 1592 0, 1593 0, 1594 MatDuplicate_MPIAIJ, 1595 0, 1596 0, 1597 0, 1598 0, 1599 0, 1600 MatGetSubMatrices_MPIAIJ, 1601 MatIncreaseOverlap_MPIAIJ, 1602 MatGetValues_MPIAIJ, 1603 MatCopy_MPIAIJ, 1604 MatPrintHelp_MPIAIJ, 1605 MatScale_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 MatGetBlockSize_MPIAIJ, 1610 0, 1611 0, 1612 0, 1613 0, 1614 MatFDColoringCreate_MPIAIJ, 1615 0, 1616 MatSetUnfactored_MPIAIJ, 1617 0, 1618 0, 1619 MatGetSubMatrix_MPIAIJ, 1620 0, 1621 0, 1622 MatGetMaps_Petsc}; 1623 1624 /* ----------------------------------------------------------------------------------------*/ 1625 1626 EXTERN_C_BEGIN 1627 #undef __FUNC__ 1628 #define __FUNC__ "MatStoreValues_MPIAIJ" 1629 int MatStoreValues_MPIAIJ(Mat mat) 1630 { 1631 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1632 int ierr; 1633 1634 PetscFunctionBegin; 1635 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1636 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 EXTERN_C_END 1640 1641 EXTERN_C_BEGIN 1642 #undef __FUNC__ 1643 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1644 int MatRetrieveValues_MPIAIJ(Mat mat) 1645 { 1646 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1647 int ierr; 1648 1649 PetscFunctionBegin; 1650 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1651 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 EXTERN_C_END 1655 1656 #undef __FUNC__ 1657 #define __FUNC__ "MatCreateMPIAIJ" 1658 /*@C 1659 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1660 (the default parallel PETSc format). For good matrix assembly performance 1661 the user should preallocate the matrix storage by setting the parameters 1662 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1663 performance can be increased by more than a factor of 50. 1664 1665 Collective on MPI_Comm 1666 1667 Input Parameters: 1668 + comm - MPI communicator 1669 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1670 This value should be the same as the local size used in creating the 1671 y vector for the matrix-vector product y = Ax. 1672 . n - This value should be the same as the local size used in creating the 1673 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1674 calculated if N is given) For square matrices n is almost always m. 1675 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1676 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1677 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1678 (same value is used for all local rows) 1679 . d_nnz - array containing the number of nonzeros in the various rows of the 1680 DIAGONAL portion of the local submatrix (possibly different for each row) 1681 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1682 The size of this array is equal to the number of local rows, i.e 'm'. 1683 You must leave room for the diagonal entry even if it is zero. 1684 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1685 submatrix (same value is used for all local rows). 1686 - o_nnz - array containing the number of nonzeros in the various rows of the 1687 OFF-DIAGONAL portion of the local submatrix (possibly different for 1688 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1689 structure. The size of this array is equal to the number 1690 of local rows, i.e 'm'. 1691 1692 Output Parameter: 1693 . A - the matrix 1694 1695 Notes: 1696 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1697 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1698 storage requirements for this matrix. 1699 1700 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1701 processor than it must be used on all processors that share the object for 1702 that argument. 1703 1704 The AIJ format (also called the Yale sparse matrix format or 1705 compressed row storage), is fully compatible with standard Fortran 77 1706 storage. That is, the stored row and column indices can begin at 1707 either one (as in Fortran) or zero. See the users manual for details. 1708 1709 The user MUST specify either the local or global matrix dimensions 1710 (possibly both). 1711 1712 The parallel matrix is partitioned such that the first m0 rows belong to 1713 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1714 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1715 1716 The DIAGONAL portion of the local submatrix of a processor can be defined 1717 as the submatrix which is obtained by extraction the part corresponding 1718 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1719 first row that belongs to the processor, and r2 is the last row belonging 1720 to the this processor. This is a square mxm matrix. The remaining portion 1721 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1722 1723 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1724 1725 By default, this format uses inodes (identical nodes) when possible. 1726 We search for consecutive rows with the same nonzero structure, thereby 1727 reusing matrix information to achieve increased efficiency. 1728 1729 Options Database Keys: 1730 + -mat_aij_no_inode - Do not use inodes 1731 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1732 - -mat_aij_oneindex - Internally use indexing starting at 1 1733 rather than 0. Note that when calling MatSetValues(), 1734 the user still MUST index entries starting at 0! 1735 . -mat_mpi - use the parallel matrix data structures even on one processor 1736 (defaults to using SeqBAIJ format on one processor) 1737 . -mat_mpi - use the parallel matrix data structures even on one processor 1738 (defaults to using SeqAIJ format on one processor) 1739 1740 1741 Example usage: 1742 1743 Consider the following 8x8 matrix with 34 non-zero values, that is 1744 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1745 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1746 as follows: 1747 1748 .vb 1749 1 2 0 | 0 3 0 | 0 4 1750 Proc0 0 5 6 | 7 0 0 | 8 0 1751 9 0 10 | 11 0 0 | 12 0 1752 ------------------------------------- 1753 13 0 14 | 15 16 17 | 0 0 1754 Proc1 0 18 0 | 19 20 21 | 0 0 1755 0 0 0 | 22 23 0 | 24 0 1756 ------------------------------------- 1757 Proc2 25 26 27 | 0 0 28 | 29 0 1758 30 0 0 | 31 32 33 | 0 34 1759 .ve 1760 1761 This can be represented as a collection of submatrices as: 1762 1763 .vb 1764 A B C 1765 D E F 1766 G H I 1767 .ve 1768 1769 Where the submatrices A,B,C are owned by proc0, D,E,F are 1770 owned by proc1, G,H,I are owned by proc2. 1771 1772 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1773 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1774 The 'M','N' parameters are 8,8, and have the same values on all procs. 1775 1776 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1777 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1778 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1779 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1780 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1781 matrix, ans [DF] as another SeqAIJ matrix. 1782 1783 When d_nz, o_nz parameters are specified, d_nz storage elements are 1784 allocated for every row of the local diagonal submatrix, and o_nz 1785 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1786 One way to choose d_nz and o_nz is to use the max nonzerors per local 1787 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1788 In this case, the values of d_nz,o_nz are: 1789 .vb 1790 proc0 : dnz = 2, o_nz = 2 1791 proc1 : dnz = 3, o_nz = 2 1792 proc2 : dnz = 1, o_nz = 4 1793 .ve 1794 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1795 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1796 for proc3. i.e we are using 12+15+10=37 storage locations to store 1797 34 values. 1798 1799 When d_nnz, o_nnz parameters are specified, the storage is specified 1800 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1801 In the above case the values for d_nnz,o_nnz are: 1802 .vb 1803 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1804 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1805 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1806 .ve 1807 Here the space allocated is sum of all the above values i.e 34, and 1808 hence pre-allocation is perfect. 1809 1810 .keywords: matrix, aij, compressed row, sparse, parallel 1811 1812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1813 @*/ 1814 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1815 { 1816 Mat B; 1817 Mat_MPIAIJ *b; 1818 int ierr, i,sum[2],work[2],size,flag1 = 0, flag2 = 0; 1819 1820 PetscFunctionBegin; 1821 MPI_Comm_size(comm,&size); 1822 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1823 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1824 if (!flag1 && !flag2 && size == 1) { 1825 if (M == PETSC_DECIDE) M = m; 1826 if (N == PETSC_DECIDE) N = n; 1827 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 1831 *A = 0; 1832 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1833 PLogObjectCreate(B); 1834 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1835 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1836 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1837 B->ops->destroy = MatDestroy_MPIAIJ; 1838 B->ops->view = MatView_MPIAIJ; 1839 B->factor = 0; 1840 B->assembled = PETSC_FALSE; 1841 B->mapping = 0; 1842 1843 B->insertmode = NOT_SET_VALUES; 1844 b->size = size; 1845 MPI_Comm_rank(comm,&b->rank); 1846 1847 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1848 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1849 } 1850 1851 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1852 work[0] = m; work[1] = n; 1853 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1854 if (M == PETSC_DECIDE) M = sum[0]; 1855 if (N == PETSC_DECIDE) N = sum[1]; 1856 } 1857 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1858 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1859 b->m = m; B->m = m; 1860 b->n = n; B->n = n; 1861 b->N = N; B->N = N; 1862 b->M = M; B->M = M; 1863 1864 /* the information in the maps duplicates the information computed below, eventually 1865 we should remove the duplicate information that is not contained in the maps */ 1866 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1867 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1868 1869 /* build local table of row and column ownerships */ 1870 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1871 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1872 b->cowners = b->rowners + b->size + 2; 1873 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1874 b->rowners[0] = 0; 1875 for ( i=2; i<=b->size; i++ ) { 1876 b->rowners[i] += b->rowners[i-1]; 1877 } 1878 b->rstart = b->rowners[b->rank]; 1879 b->rend = b->rowners[b->rank+1]; 1880 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1881 b->cowners[0] = 0; 1882 for ( i=2; i<=b->size; i++ ) { 1883 b->cowners[i] += b->cowners[i-1]; 1884 } 1885 b->cstart = b->cowners[b->rank]; 1886 b->cend = b->cowners[b->rank+1]; 1887 1888 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1889 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1890 PLogObjectParent(B,b->A); 1891 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1892 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1893 PLogObjectParent(B,b->B); 1894 1895 /* build cache for off array entries formed */ 1896 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1897 b->donotstash = 0; 1898 b->colmap = 0; 1899 b->garray = 0; 1900 b->roworiented = 1; 1901 1902 /* stuff used for matrix vector multiply */ 1903 b->lvec = 0; 1904 b->Mvctx = 0; 1905 1906 /* stuff for MatGetRow() */ 1907 b->rowindices = 0; 1908 b->rowvalues = 0; 1909 b->getrowactive = PETSC_FALSE; 1910 1911 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1912 "MatStoreValues_MPIAIJ", 1913 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1914 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1915 "MatRetrieveValues_MPIAIJ", 1916 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1917 *A = B; 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #undef __FUNC__ 1922 #define __FUNC__ "MatDuplicate_MPIAIJ" 1923 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1924 { 1925 Mat mat; 1926 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1927 int ierr, len=0, flg; 1928 1929 PetscFunctionBegin; 1930 *newmat = 0; 1931 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1932 PLogObjectCreate(mat); 1933 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1934 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1935 mat->ops->destroy = MatDestroy_MPIAIJ; 1936 mat->ops->view = MatView_MPIAIJ; 1937 mat->factor = matin->factor; 1938 mat->assembled = PETSC_TRUE; 1939 1940 a->m = mat->m = oldmat->m; 1941 a->n = mat->n = oldmat->n; 1942 a->M = mat->M = oldmat->M; 1943 a->N = mat->N = oldmat->N; 1944 1945 a->rstart = oldmat->rstart; 1946 a->rend = oldmat->rend; 1947 a->cstart = oldmat->cstart; 1948 a->cend = oldmat->cend; 1949 a->size = oldmat->size; 1950 a->rank = oldmat->rank; 1951 mat->insertmode = NOT_SET_VALUES; 1952 a->rowvalues = 0; 1953 a->getrowactive = PETSC_FALSE; 1954 1955 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1956 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1957 a->cowners = a->rowners + a->size + 2; 1958 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1959 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1960 if (oldmat->colmap) { 1961 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1962 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1963 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1964 } else a->colmap = 0; 1965 if (oldmat->garray) { 1966 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1967 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1968 PLogObjectMemory(mat,len*sizeof(int)); 1969 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1970 } else a->garray = 0; 1971 1972 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1973 PLogObjectParent(mat,a->lvec); 1974 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1975 PLogObjectParent(mat,a->Mvctx); 1976 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 1977 PLogObjectParent(mat,a->A); 1978 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 1979 PLogObjectParent(mat,a->B); 1980 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1981 if (flg) { 1982 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1983 } 1984 *newmat = mat; 1985 PetscFunctionReturn(0); 1986 } 1987 1988 #include "sys.h" 1989 1990 #undef __FUNC__ 1991 #define __FUNC__ "MatLoad_MPIAIJ" 1992 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1993 { 1994 Mat A; 1995 Scalar *vals,*svals; 1996 MPI_Comm comm = ((PetscObject)viewer)->comm; 1997 MPI_Status status; 1998 int i, nz, ierr, j,rstart, rend, fd; 1999 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2000 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2001 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2002 2003 PetscFunctionBegin; 2004 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2005 if (!rank) { 2006 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2007 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2008 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2009 if (header[3] < 0) { 2010 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2011 } 2012 } 2013 2014 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2015 M = header[1]; N = header[2]; 2016 /* determine ownership of all rows */ 2017 m = M/size + ((M % size) > rank); 2018 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2019 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2020 rowners[0] = 0; 2021 for ( i=2; i<=size; i++ ) { 2022 rowners[i] += rowners[i-1]; 2023 } 2024 rstart = rowners[rank]; 2025 rend = rowners[rank+1]; 2026 2027 /* distribute row lengths to all processors */ 2028 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2029 offlens = ourlens + (rend-rstart); 2030 if (!rank) { 2031 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2032 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2033 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2034 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2035 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2036 PetscFree(sndcounts); 2037 } else { 2038 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2039 } 2040 2041 if (!rank) { 2042 /* calculate the number of nonzeros on each processor */ 2043 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2044 PetscMemzero(procsnz,size*sizeof(int)); 2045 for ( i=0; i<size; i++ ) { 2046 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2047 procsnz[i] += rowlengths[j]; 2048 } 2049 } 2050 PetscFree(rowlengths); 2051 2052 /* determine max buffer needed and allocate it */ 2053 maxnz = 0; 2054 for ( i=0; i<size; i++ ) { 2055 maxnz = PetscMax(maxnz,procsnz[i]); 2056 } 2057 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2058 2059 /* read in my part of the matrix column indices */ 2060 nz = procsnz[0]; 2061 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2062 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2063 2064 /* read in every one elses and ship off */ 2065 for ( i=1; i<size; i++ ) { 2066 nz = procsnz[i]; 2067 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2068 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2069 } 2070 PetscFree(cols); 2071 } else { 2072 /* determine buffer space needed for message */ 2073 nz = 0; 2074 for ( i=0; i<m; i++ ) { 2075 nz += ourlens[i]; 2076 } 2077 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2078 2079 /* receive message of column indices*/ 2080 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2081 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2082 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2083 } 2084 2085 /* determine column ownership if matrix is not square */ 2086 if (N != M) { 2087 n = N/size + ((N % size) > rank); 2088 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2089 cstart = cend - n; 2090 } else { 2091 cstart = rstart; 2092 cend = rend; 2093 n = cend - cstart; 2094 } 2095 2096 /* loop over local rows, determining number of off diagonal entries */ 2097 PetscMemzero(offlens,m*sizeof(int)); 2098 jj = 0; 2099 for ( i=0; i<m; i++ ) { 2100 for ( j=0; j<ourlens[i]; j++ ) { 2101 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2102 jj++; 2103 } 2104 } 2105 2106 /* create our matrix */ 2107 for ( i=0; i<m; i++ ) { 2108 ourlens[i] -= offlens[i]; 2109 } 2110 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2111 A = *newmat; 2112 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2113 for ( i=0; i<m; i++ ) { 2114 ourlens[i] += offlens[i]; 2115 } 2116 2117 if (!rank) { 2118 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2119 2120 /* read in my part of the matrix numerical values */ 2121 nz = procsnz[0]; 2122 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2123 2124 /* insert into matrix */ 2125 jj = rstart; 2126 smycols = mycols; 2127 svals = vals; 2128 for ( i=0; i<m; i++ ) { 2129 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2130 smycols += ourlens[i]; 2131 svals += ourlens[i]; 2132 jj++; 2133 } 2134 2135 /* read in other processors and ship out */ 2136 for ( i=1; i<size; i++ ) { 2137 nz = procsnz[i]; 2138 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2139 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2140 } 2141 PetscFree(procsnz); 2142 } else { 2143 /* receive numeric values */ 2144 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2145 2146 /* receive message of values*/ 2147 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2148 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2149 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2150 2151 /* insert into matrix */ 2152 jj = rstart; 2153 smycols = mycols; 2154 svals = vals; 2155 for ( i=0; i<m; i++ ) { 2156 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2157 smycols += ourlens[i]; 2158 svals += ourlens[i]; 2159 jj++; 2160 } 2161 } 2162 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2163 2164 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2165 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNC__ 2170 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2171 /* 2172 Not great since it makes two copies of the submatrix, first an SeqAIJ 2173 in local and then by concatenating the local matrices the end result. 2174 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2175 */ 2176 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 2177 { 2178 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2179 Mat *local,M, Mreuse; 2180 Scalar *vwork,*aa; 2181 MPI_Comm comm = mat->comm; 2182 Mat_SeqAIJ *aij; 2183 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2184 2185 PetscFunctionBegin; 2186 MPI_Comm_rank(comm,&rank); 2187 MPI_Comm_size(comm,&size); 2188 2189 if (call == MAT_REUSE_MATRIX) { 2190 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2191 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2192 local = &Mreuse; 2193 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2194 } else { 2195 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2196 Mreuse = *local; 2197 PetscFree(local); 2198 } 2199 2200 /* 2201 m - number of local rows 2202 n - number of columns (same on all processors) 2203 rstart - first row in new global matrix generated 2204 */ 2205 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2206 if (call == MAT_INITIAL_MATRIX) { 2207 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2208 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2209 ii = aij->i; 2210 jj = aij->j; 2211 2212 /* 2213 Determine the number of non-zeros in the diagonal and off-diagonal 2214 portions of the matrix in order to do correct preallocation 2215 */ 2216 2217 /* first get start and end of "diagonal" columns */ 2218 if (csize == PETSC_DECIDE) { 2219 nlocal = n/size + ((n % size) > rank); 2220 } else { 2221 nlocal = csize; 2222 } 2223 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2224 rstart = rend - nlocal; 2225 if (rank == size - 1 && rend != n) { 2226 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2227 } 2228 2229 /* next, compute all the lengths */ 2230 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2231 olens = dlens + m; 2232 for ( i=0; i<m; i++ ) { 2233 jend = ii[i+1] - ii[i]; 2234 olen = 0; 2235 dlen = 0; 2236 for ( j=0; j<jend; j++ ) { 2237 if ( *jj < rstart || *jj >= rend) olen++; 2238 else dlen++; 2239 jj++; 2240 } 2241 olens[i] = olen; 2242 dlens[i] = dlen; 2243 } 2244 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2245 PetscFree(dlens); 2246 } else { 2247 int ml,nl; 2248 2249 M = *newmat; 2250 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2251 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2252 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2253 /* 2254 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2255 rather than the slower MatSetValues(). 2256 */ 2257 M->was_assembled = PETSC_TRUE; 2258 M->assembled = PETSC_FALSE; 2259 } 2260 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2261 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2262 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2263 ii = aij->i; 2264 jj = aij->j; 2265 aa = aij->a; 2266 for (i=0; i<m; i++) { 2267 row = rstart + i; 2268 nz = ii[i+1] - ii[i]; 2269 cwork = jj; jj += nz; 2270 vwork = aa; aa += nz; 2271 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2272 } 2273 2274 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2275 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2276 *newmat = M; 2277 2278 /* save submatrix used in processor for next request */ 2279 if (call == MAT_INITIAL_MATRIX) { 2280 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2281 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2282 } 2283 2284 PetscFunctionReturn(0); 2285 } 2286