1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.234 1998/04/03 23:15:06 bsmith Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } 238 else col = in[j]; 239 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 240 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 241 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 242 } 243 } 244 } 245 else { 246 if (roworiented && !aij->donotstash) { 247 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 248 } 249 else { 250 if (!aij->donotstash) { 251 row = im[i]; 252 for ( j=0; j<n; j++ ) { 253 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 254 } 255 } 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNC__ 263 #define __FUNC__ "MatGetValues_MPIAIJ" 264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 265 { 266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 267 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 268 int cstart = aij->cstart, cend = aij->cend,row,col; 269 270 PetscFunctionBegin; 271 for ( i=0; i<m; i++ ) { 272 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 273 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 274 if (idxm[i] >= rstart && idxm[i] < rend) { 275 row = idxm[i] - rstart; 276 for ( j=0; j<n; j++ ) { 277 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 278 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 279 if (idxn[j] >= cstart && idxn[j] < cend){ 280 col = idxn[j] - cstart; 281 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 282 } 283 else { 284 if (!aij->colmap) { 285 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 286 } 287 col = aij->colmap[idxn[j]] - 1; 288 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 289 else { 290 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 291 } 292 } 293 } 294 } else { 295 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 296 } 297 } 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 PetscFunctionBegin; 315 /* make sure all processors are either in INSERTMODE or ADDMODE */ 316 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 317 if (addv == (ADD_VALUES|INSERT_VALUES)) { 318 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 319 } 320 mat->insertmode = addv; /* in case this processor had no cache */ 321 322 /* first count number of contributors to each processor */ 323 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 324 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 325 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 326 for ( i=0; i<aij->stash.n; i++ ) { 327 idx = aij->stash.idx[i]; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 331 } 332 } 333 } 334 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 335 336 /* inform other processors of number of messages and max length*/ 337 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 338 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 339 nreceives = work[rank]; 340 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 341 nmax = work[rank]; 342 PetscFree(work); 343 344 /* post receives: 345 1) each message will consist of ordered pairs 346 (global index,value) we store the global index as a double 347 to simplify the message passing. 348 2) since we don't know how long each individual message is we 349 allocate the largest needed buffer for each receive. Potentially 350 this is a lot of wasted space. 351 352 353 This could be done better. 354 */ 355 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 357 for ( i=0; i<nreceives; i++ ) { 358 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 359 comm,recv_waits+i);CHKERRQ(ierr); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 368 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 369 starts[0] = 0; 370 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 for ( i=0; i<aij->stash.n; i++ ) { 372 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 373 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 374 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 375 } 376 PetscFree(owner); 377 starts[0] = 0; 378 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 PetscFunctionReturn(0); 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 PetscFunctionBegin; 412 /* wait on receives */ 413 while (count) { 414 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 415 /* unpack receives into our local space */ 416 values = aij->rvalues + 3*imdex*aij->rmax; 417 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 418 n = n/3; 419 for ( i=0; i<n; i++ ) { 420 row = (int) PetscReal(values[3*i]) - aij->rstart; 421 col = (int) PetscReal(values[3*i+1]); 422 val = values[3*i+2]; 423 if (col >= aij->cstart && col < aij->cend) { 424 col -= aij->cstart; 425 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 426 } else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 479 PetscFunctionBegin; 480 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 481 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNC__ 486 #define __FUNC__ "MatZeroRows_MPIAIJ" 487 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 488 { 489 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 490 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 491 int *procs,*nprocs,j,found,idx,nsends,*work,row; 492 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 493 int *rvalues,tag = A->tag,count,base,slen,n,*source; 494 int *lens,imdex,*lrows,*values,rstart=l->rstart; 495 MPI_Comm comm = A->comm; 496 MPI_Request *send_waits,*recv_waits; 497 MPI_Status recv_status,*send_status; 498 IS istmp; 499 500 PetscFunctionBegin; 501 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 502 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 503 504 /* first count number of contributors to each processor */ 505 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 506 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 507 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 508 for ( i=0; i<N; i++ ) { 509 idx = rows[i]; 510 found = 0; 511 for ( j=0; j<size; j++ ) { 512 if (idx >= owners[j] && idx < owners[j+1]) { 513 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 514 } 515 } 516 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 517 } 518 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 519 520 /* inform other processors of number of messages and max length*/ 521 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 522 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 523 nrecvs = work[rank]; 524 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 525 nmax = work[rank]; 526 PetscFree(work); 527 528 /* post receives: */ 529 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 530 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 531 for ( i=0; i<nrecvs; i++ ) { 532 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 533 } 534 535 /* do sends: 536 1) starts[i] gives the starting index in svalues for stuff going to 537 the ith processor 538 */ 539 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 540 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 541 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 542 starts[0] = 0; 543 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 544 for ( i=0; i<N; i++ ) { 545 svalues[starts[owner[i]]++] = rows[i]; 546 } 547 ISRestoreIndices(is,&rows); 548 549 starts[0] = 0; 550 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 551 count = 0; 552 for ( i=0; i<size; i++ ) { 553 if (procs[i]) { 554 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 555 } 556 } 557 PetscFree(starts); 558 559 base = owners[rank]; 560 561 /* wait on receives */ 562 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 563 source = lens + nrecvs; 564 count = nrecvs; slen = 0; 565 while (count) { 566 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 567 /* unpack receives into our local space */ 568 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 569 source[imdex] = recv_status.MPI_SOURCE; 570 lens[imdex] = n; 571 slen += n; 572 count--; 573 } 574 PetscFree(recv_waits); 575 576 /* move the data into the send scatter */ 577 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 578 count = 0; 579 for ( i=0; i<nrecvs; i++ ) { 580 values = rvalues + i*nmax; 581 for ( j=0; j<lens[i]; j++ ) { 582 lrows[count++] = values[j] - base; 583 } 584 } 585 PetscFree(rvalues); PetscFree(lens); 586 PetscFree(owner); PetscFree(nprocs); 587 588 /* actually zap the local rows */ 589 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 590 PLogObjectParent(A,istmp); 591 592 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 593 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 594 ierr = ISDestroy(istmp); CHKERRQ(ierr); 595 596 if (diag) { 597 for ( i = 0; i < slen; i++ ) { 598 row = lrows[i] + rstart; 599 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 600 } 601 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 602 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 603 } 604 PetscFree(lrows); 605 /* wait on sends */ 606 if (nsends) { 607 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 608 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 609 PetscFree(send_status); 610 } 611 PetscFree(send_waits); PetscFree(svalues); 612 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNC__ 617 #define __FUNC__ "MatMult_MPIAIJ" 618 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 619 { 620 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 621 int ierr,nt; 622 623 PetscFunctionBegin; 624 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 625 if (nt != a->n) { 626 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 627 } 628 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 629 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 630 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 631 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNC__ 636 #define __FUNC__ "MatMultAdd_MPIAIJ" 637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 638 { 639 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 640 int ierr; 641 642 PetscFunctionBegin; 643 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 644 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 645 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 646 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNC__ 651 #define __FUNC__ "MatMultTrans_MPIAIJ" 652 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 653 { 654 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 655 int ierr; 656 657 PetscFunctionBegin; 658 /* do nondiagonal part */ 659 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 660 /* send it on its way */ 661 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 662 /* do local part */ 663 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 664 /* receive remote parts: note this assumes the values are not actually */ 665 /* inserted in yy until the next line, which is true for my implementation*/ 666 /* but is not perhaps always true. */ 667 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 673 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 674 { 675 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 676 int ierr; 677 678 PetscFunctionBegin; 679 /* do nondiagonal part */ 680 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 681 /* send it on its way */ 682 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 683 /* do local part */ 684 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 685 /* receive remote parts: note this assumes the values are not actually */ 686 /* inserted in yy until the next line, which is true for my implementation*/ 687 /* but is not perhaps always true. */ 688 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 /* 693 This only works correctly for square matrices where the subblock A->A is the 694 diagonal block 695 */ 696 #undef __FUNC__ 697 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 698 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 699 { 700 int ierr; 701 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 702 703 PetscFunctionBegin; 704 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 705 if (a->rstart != a->cstart || a->rend != a->cend) { 706 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 707 } 708 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatScale_MPIAIJ" 714 int MatScale_MPIAIJ(Scalar *aa,Mat A) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 717 int ierr; 718 719 PetscFunctionBegin; 720 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 721 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNC__ 726 #define __FUNC__ "MatDestroy_MPIAIJ" 727 int MatDestroy_MPIAIJ(Mat mat) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 730 int ierr; 731 732 PetscFunctionBegin; 733 #if defined(USE_PETSC_LOG) 734 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 735 #endif 736 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 737 PetscFree(aij->rowners); 738 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 739 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 740 if (aij->colmap) PetscFree(aij->colmap); 741 if (aij->garray) PetscFree(aij->garray); 742 if (aij->lvec) VecDestroy(aij->lvec); 743 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 744 if (aij->rowvalues) PetscFree(aij->rowvalues); 745 PetscFree(aij); 746 PLogObjectDestroy(mat); 747 PetscHeaderDestroy(mat); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNC__ 752 #define __FUNC__ "MatView_MPIAIJ_Binary" 753 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 756 int ierr; 757 758 PetscFunctionBegin; 759 if (aij->size == 1) { 760 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 761 } 762 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNC__ 767 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 768 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 769 { 770 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 771 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 772 int ierr, format,shift = C->indexshift,rank; 773 FILE *fd; 774 ViewerType vtype; 775 776 PetscFunctionBegin; 777 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 778 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 779 ierr = ViewerGetFormat(viewer,&format); 780 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 781 MatInfo info; 782 int flg; 783 MPI_Comm_rank(mat->comm,&rank); 784 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 785 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 786 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 787 PetscSequentialPhaseBegin(mat->comm,1); 788 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 789 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 790 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 791 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 792 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 793 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 794 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 795 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 796 fflush(fd); 797 PetscSequentialPhaseEnd(mat->comm,1); 798 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 799 PetscFunctionReturn(0); 800 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 801 PetscFunctionReturn(0); 802 } 803 } 804 805 if (vtype == DRAW_VIEWER) { 806 Draw draw; 807 PetscTruth isnull; 808 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 809 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 810 } 811 812 if (vtype == ASCII_FILE_VIEWER) { 813 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 814 PetscSequentialPhaseBegin(mat->comm,1); 815 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 816 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 817 aij->cend); 818 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 819 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 820 fflush(fd); 821 PetscSequentialPhaseEnd(mat->comm,1); 822 } else { 823 int size = aij->size; 824 rank = aij->rank; 825 if (size == 1) { 826 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 827 } else { 828 /* assemble the entire matrix onto first processor. */ 829 Mat A; 830 Mat_SeqAIJ *Aloc; 831 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 832 Scalar *a; 833 834 if (!rank) { 835 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 836 } else { 837 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 838 } 839 PLogObjectParent(mat,A); 840 841 /* copy over the A part */ 842 Aloc = (Mat_SeqAIJ*) aij->A->data; 843 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 844 row = aij->rstart; 845 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 846 for ( i=0; i<m; i++ ) { 847 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 848 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 849 } 850 aj = Aloc->j; 851 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 852 853 /* copy over the B part */ 854 Aloc = (Mat_SeqAIJ*) aij->B->data; 855 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 856 row = aij->rstart; 857 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 858 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 859 for ( i=0; i<m; i++ ) { 860 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 861 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 862 } 863 PetscFree(ct); 864 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 865 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 866 /* 867 Everyone has to call to draw the matrix since the graphics waits are 868 synchronized across all processors that share the Draw object 869 */ 870 if (!rank || vtype == DRAW_VIEWER) { 871 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 872 } 873 ierr = MatDestroy(A); CHKERRQ(ierr); 874 } 875 } 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNC__ 880 #define __FUNC__ "MatView_MPIAIJ" 881 int MatView_MPIAIJ(Mat mat,Viewer viewer) 882 { 883 int ierr; 884 ViewerType vtype; 885 886 PetscFunctionBegin; 887 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 888 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 889 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 890 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 891 } else if (vtype == BINARY_FILE_VIEWER) { 892 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 893 } else { 894 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 895 } 896 PetscFunctionReturn(0); 897 } 898 899 /* 900 This has to provide several versions. 901 902 2) a) use only local smoothing updating outer values only once. 903 b) local smoothing updating outer values each inner iteration 904 3) color updating out values betwen colors. 905 */ 906 #undef __FUNC__ 907 #define __FUNC__ "MatRelax_MPIAIJ" 908 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 909 double fshift,int its,Vec xx) 910 { 911 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 912 Mat AA = mat->A, BB = mat->B; 913 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 914 Scalar *b,*x,*xs,*ls,d,*v,sum; 915 int ierr,*idx, *diag; 916 int n = mat->n, m = mat->m, i,shift = A->indexshift; 917 918 PetscFunctionBegin; 919 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 920 xs = x + shift; /* shift by one for index start of 1 */ 921 ls = ls + shift; 922 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 923 diag = A->diag; 924 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 925 if (flag & SOR_ZERO_INITIAL_GUESS) { 926 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 927 PetscFunctionReturn(0); 928 } 929 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 930 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 931 while (its--) { 932 /* go down through the rows */ 933 for ( i=0; i<m; i++ ) { 934 n = A->i[i+1] - A->i[i]; 935 idx = A->j + A->i[i] + shift; 936 v = A->a + A->i[i] + shift; 937 sum = b[i]; 938 SPARSEDENSEMDOT(sum,xs,v,idx,n); 939 d = fshift + A->a[diag[i]+shift]; 940 n = B->i[i+1] - B->i[i]; 941 idx = B->j + B->i[i] + shift; 942 v = B->a + B->i[i] + shift; 943 SPARSEDENSEMDOT(sum,ls,v,idx,n); 944 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 945 } 946 /* come up through the rows */ 947 for ( i=m-1; i>-1; i-- ) { 948 n = A->i[i+1] - A->i[i]; 949 idx = A->j + A->i[i] + shift; 950 v = A->a + A->i[i] + shift; 951 sum = b[i]; 952 SPARSEDENSEMDOT(sum,xs,v,idx,n); 953 d = fshift + A->a[diag[i]+shift]; 954 n = B->i[i+1] - B->i[i]; 955 idx = B->j + B->i[i] + shift; 956 v = B->a + B->i[i] + shift; 957 SPARSEDENSEMDOT(sum,ls,v,idx,n); 958 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 959 } 960 } 961 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 962 if (flag & SOR_ZERO_INITIAL_GUESS) { 963 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 967 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 968 while (its--) { 969 for ( i=0; i<m; i++ ) { 970 n = A->i[i+1] - A->i[i]; 971 idx = A->j + A->i[i] + shift; 972 v = A->a + A->i[i] + shift; 973 sum = b[i]; 974 SPARSEDENSEMDOT(sum,xs,v,idx,n); 975 d = fshift + A->a[diag[i]+shift]; 976 n = B->i[i+1] - B->i[i]; 977 idx = B->j + B->i[i] + shift; 978 v = B->a + B->i[i] + shift; 979 SPARSEDENSEMDOT(sum,ls,v,idx,n); 980 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 981 } 982 } 983 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 984 if (flag & SOR_ZERO_INITIAL_GUESS) { 985 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 989 mat->Mvctx); CHKERRQ(ierr); 990 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 991 mat->Mvctx); CHKERRQ(ierr); 992 while (its--) { 993 for ( i=m-1; i>-1; i-- ) { 994 n = A->i[i+1] - A->i[i]; 995 idx = A->j + A->i[i] + shift; 996 v = A->a + A->i[i] + shift; 997 sum = b[i]; 998 SPARSEDENSEMDOT(sum,xs,v,idx,n); 999 d = fshift + A->a[diag[i]+shift]; 1000 n = B->i[i+1] - B->i[i]; 1001 idx = B->j + B->i[i] + shift; 1002 v = B->a + B->i[i] + shift; 1003 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1004 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1005 } 1006 } 1007 } else { 1008 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNC__ 1014 #define __FUNC__ "MatGetInfo_MPIAIJ" 1015 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1016 { 1017 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1018 Mat A = mat->A, B = mat->B; 1019 int ierr; 1020 double isend[5], irecv[5]; 1021 1022 PetscFunctionBegin; 1023 info->block_size = 1.0; 1024 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1025 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1026 isend[3] = info->memory; isend[4] = info->mallocs; 1027 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1028 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1029 isend[3] += info->memory; isend[4] += info->mallocs; 1030 if (flag == MAT_LOCAL) { 1031 info->nz_used = isend[0]; 1032 info->nz_allocated = isend[1]; 1033 info->nz_unneeded = isend[2]; 1034 info->memory = isend[3]; 1035 info->mallocs = isend[4]; 1036 } else if (flag == MAT_GLOBAL_MAX) { 1037 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1038 info->nz_used = irecv[0]; 1039 info->nz_allocated = irecv[1]; 1040 info->nz_unneeded = irecv[2]; 1041 info->memory = irecv[3]; 1042 info->mallocs = irecv[4]; 1043 } else if (flag == MAT_GLOBAL_SUM) { 1044 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1045 info->nz_used = irecv[0]; 1046 info->nz_allocated = irecv[1]; 1047 info->nz_unneeded = irecv[2]; 1048 info->memory = irecv[3]; 1049 info->mallocs = irecv[4]; 1050 } 1051 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1052 info->fill_ratio_needed = 0; 1053 info->factor_mallocs = 0; 1054 info->rows_global = (double)mat->M; 1055 info->columns_global = (double)mat->N; 1056 info->rows_local = (double)mat->m; 1057 info->columns_local = (double)mat->N; 1058 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNC__ 1063 #define __FUNC__ "MatSetOption_MPIAIJ" 1064 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1065 { 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1067 1068 PetscFunctionBegin; 1069 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1070 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1071 op == MAT_COLUMNS_UNSORTED || 1072 op == MAT_COLUMNS_SORTED || 1073 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1074 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1075 MatSetOption(a->A,op); 1076 MatSetOption(a->B,op); 1077 } else if (op == MAT_ROW_ORIENTED) { 1078 a->roworiented = 1; 1079 MatSetOption(a->A,op); 1080 MatSetOption(a->B,op); 1081 } else if (op == MAT_ROWS_SORTED || 1082 op == MAT_ROWS_UNSORTED || 1083 op == MAT_SYMMETRIC || 1084 op == MAT_STRUCTURALLY_SYMMETRIC || 1085 op == MAT_YES_NEW_DIAGONALS) 1086 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1087 else if (op == MAT_COLUMN_ORIENTED) { 1088 a->roworiented = 0; 1089 MatSetOption(a->A,op); 1090 MatSetOption(a->B,op); 1091 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1092 a->donotstash = 1; 1093 } else if (op == MAT_NO_NEW_DIAGONALS){ 1094 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1095 } else { 1096 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1097 } 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNC__ 1102 #define __FUNC__ "MatGetSize_MPIAIJ" 1103 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1104 { 1105 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1106 1107 PetscFunctionBegin; 1108 if (m) *m = mat->M; 1109 if (n) *n = mat->N; 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNC__ 1114 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1115 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1116 { 1117 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1118 1119 PetscFunctionBegin; 1120 if (m) *m = mat->m; 1121 if (n) *n = mat->n; 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNC__ 1126 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1127 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1128 { 1129 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1130 1131 PetscFunctionBegin; 1132 *m = mat->rstart; *n = mat->rend; 1133 PetscFunctionReturn(0); 1134 } 1135 1136 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1137 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1138 1139 #undef __FUNC__ 1140 #define __FUNC__ "MatGetRow_MPIAIJ" 1141 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1142 { 1143 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1144 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1145 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1146 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1147 int *cmap, *idx_p; 1148 1149 PetscFunctionBegin; 1150 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1151 mat->getrowactive = PETSC_TRUE; 1152 1153 if (!mat->rowvalues && (idx || v)) { 1154 /* 1155 allocate enough space to hold information from the longest row. 1156 */ 1157 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1158 int max = 1,m = mat->m,tmp; 1159 for ( i=0; i<m; i++ ) { 1160 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1161 if (max < tmp) { max = tmp; } 1162 } 1163 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1164 CHKPTRQ(mat->rowvalues); 1165 mat->rowindices = (int *) (mat->rowvalues + max); 1166 } 1167 1168 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1169 lrow = row - rstart; 1170 1171 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1172 if (!v) {pvA = 0; pvB = 0;} 1173 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1174 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1175 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1176 nztot = nzA + nzB; 1177 1178 cmap = mat->garray; 1179 if (v || idx) { 1180 if (nztot) { 1181 /* Sort by increasing column numbers, assuming A and B already sorted */ 1182 int imark = -1; 1183 if (v) { 1184 *v = v_p = mat->rowvalues; 1185 for ( i=0; i<nzB; i++ ) { 1186 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1187 else break; 1188 } 1189 imark = i; 1190 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1191 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1192 } 1193 if (idx) { 1194 *idx = idx_p = mat->rowindices; 1195 if (imark > -1) { 1196 for ( i=0; i<imark; i++ ) { 1197 idx_p[i] = cmap[cworkB[i]]; 1198 } 1199 } else { 1200 for ( i=0; i<nzB; i++ ) { 1201 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1202 else break; 1203 } 1204 imark = i; 1205 } 1206 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1207 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1208 } 1209 } 1210 else { 1211 if (idx) *idx = 0; 1212 if (v) *v = 0; 1213 } 1214 } 1215 *nz = nztot; 1216 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1217 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNC__ 1222 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1223 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1224 { 1225 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1226 1227 PetscFunctionBegin; 1228 if (aij->getrowactive == PETSC_FALSE) { 1229 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1230 } 1231 aij->getrowactive = PETSC_FALSE; 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNC__ 1236 #define __FUNC__ "MatNorm_MPIAIJ" 1237 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1238 { 1239 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1240 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1241 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1242 double sum = 0.0; 1243 Scalar *v; 1244 1245 PetscFunctionBegin; 1246 if (aij->size == 1) { 1247 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1248 } else { 1249 if (type == NORM_FROBENIUS) { 1250 v = amat->a; 1251 for (i=0; i<amat->nz; i++ ) { 1252 #if defined(USE_PETSC_COMPLEX) 1253 sum += real(conj(*v)*(*v)); v++; 1254 #else 1255 sum += (*v)*(*v); v++; 1256 #endif 1257 } 1258 v = bmat->a; 1259 for (i=0; i<bmat->nz; i++ ) { 1260 #if defined(USE_PETSC_COMPLEX) 1261 sum += real(conj(*v)*(*v)); v++; 1262 #else 1263 sum += (*v)*(*v); v++; 1264 #endif 1265 } 1266 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1267 *norm = sqrt(*norm); 1268 } else if (type == NORM_1) { /* max column norm */ 1269 double *tmp, *tmp2; 1270 int *jj, *garray = aij->garray; 1271 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1272 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1273 PetscMemzero(tmp,aij->N*sizeof(double)); 1274 *norm = 0.0; 1275 v = amat->a; jj = amat->j; 1276 for ( j=0; j<amat->nz; j++ ) { 1277 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1278 } 1279 v = bmat->a; jj = bmat->j; 1280 for ( j=0; j<bmat->nz; j++ ) { 1281 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1282 } 1283 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1284 for ( j=0; j<aij->N; j++ ) { 1285 if (tmp2[j] > *norm) *norm = tmp2[j]; 1286 } 1287 PetscFree(tmp); PetscFree(tmp2); 1288 } else if (type == NORM_INFINITY) { /* max row norm */ 1289 double ntemp = 0.0; 1290 for ( j=0; j<amat->m; j++ ) { 1291 v = amat->a + amat->i[j] + shift; 1292 sum = 0.0; 1293 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1294 sum += PetscAbsScalar(*v); v++; 1295 } 1296 v = bmat->a + bmat->i[j] + shift; 1297 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1298 sum += PetscAbsScalar(*v); v++; 1299 } 1300 if (sum > ntemp) ntemp = sum; 1301 } 1302 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1303 } else { 1304 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1305 } 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNC__ 1311 #define __FUNC__ "MatTranspose_MPIAIJ" 1312 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1313 { 1314 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1315 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1316 int ierr,shift = Aloc->indexshift; 1317 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1318 Mat B; 1319 Scalar *array; 1320 1321 PetscFunctionBegin; 1322 if (matout == PETSC_NULL && M != N) { 1323 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1324 } 1325 1326 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1327 1328 /* copy over the A part */ 1329 Aloc = (Mat_SeqAIJ*) a->A->data; 1330 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1331 row = a->rstart; 1332 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1333 for ( i=0; i<m; i++ ) { 1334 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1335 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1336 } 1337 aj = Aloc->j; 1338 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1339 1340 /* copy over the B part */ 1341 Aloc = (Mat_SeqAIJ*) a->B->data; 1342 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1343 row = a->rstart; 1344 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1345 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1346 for ( i=0; i<m; i++ ) { 1347 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1348 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1349 } 1350 PetscFree(ct); 1351 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1352 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1353 if (matout != PETSC_NULL) { 1354 *matout = B; 1355 } else { 1356 PetscOps *Abops; 1357 struct _MatOps *Aops; 1358 1359 /* This isn't really an in-place transpose .... but free data structures from a */ 1360 PetscFree(a->rowners); 1361 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1362 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1363 if (a->colmap) PetscFree(a->colmap); 1364 if (a->garray) PetscFree(a->garray); 1365 if (a->lvec) VecDestroy(a->lvec); 1366 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1367 PetscFree(a); 1368 1369 /* 1370 This is horrible, horrible code. We need to keep the 1371 A pointers for the bops and ops but copy everything 1372 else from C. 1373 */ 1374 Abops = A->bops; 1375 Aops = A->ops; 1376 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1377 A->bops = Abops; 1378 A->ops = Aops; 1379 PetscHeaderDestroy(B); 1380 } 1381 PetscFunctionReturn(0); 1382 } 1383 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1386 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1387 { 1388 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1389 Mat a = aij->A, b = aij->B; 1390 int ierr,s1,s2,s3; 1391 1392 PetscFunctionBegin; 1393 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1394 if (rr) { 1395 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1396 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1397 /* Overlap communication with computation. */ 1398 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1399 } 1400 if (ll) { 1401 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1402 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1403 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1404 } 1405 /* scale the diagonal block */ 1406 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1407 1408 if (rr) { 1409 /* Do a scatter end and then right scale the off-diagonal block */ 1410 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1411 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1412 } 1413 1414 PetscFunctionReturn(0); 1415 } 1416 1417 1418 extern int MatPrintHelp_SeqAIJ(Mat); 1419 #undef __FUNC__ 1420 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1421 int MatPrintHelp_MPIAIJ(Mat A) 1422 { 1423 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1424 int ierr; 1425 1426 PetscFunctionBegin; 1427 if (!a->rank) { 1428 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNC__ 1434 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1435 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1436 { 1437 PetscFunctionBegin; 1438 *bs = 1; 1439 PetscFunctionReturn(0); 1440 } 1441 #undef __FUNC__ 1442 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1443 int MatSetUnfactored_MPIAIJ(Mat A) 1444 { 1445 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1446 int ierr; 1447 1448 PetscFunctionBegin; 1449 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNC__ 1454 #define __FUNC__ "MatEqual_MPIAIJ" 1455 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1456 { 1457 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1458 Mat a, b, c, d; 1459 PetscTruth flg; 1460 int ierr; 1461 1462 PetscFunctionBegin; 1463 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1464 a = matA->A; b = matA->B; 1465 c = matB->A; d = matB->B; 1466 1467 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1468 if (flg == PETSC_TRUE) { 1469 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1470 } 1471 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1476 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1477 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1478 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1479 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1480 1481 /* -------------------------------------------------------------------*/ 1482 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1483 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1484 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1485 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1486 0,0, 1487 0,0, 1488 0,0, 1489 MatRelax_MPIAIJ, 1490 MatTranspose_MPIAIJ, 1491 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1492 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1493 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1494 0, 1495 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1496 0,0,0,0, 1497 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1498 0,0, 1499 0,0,MatConvertSameType_MPIAIJ,0,0, 1500 0,0,0, 1501 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1502 MatPrintHelp_MPIAIJ, 1503 MatScale_MPIAIJ,0,0,0, 1504 MatGetBlockSize_MPIAIJ,0,0,0,0, 1505 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1506 0,0,MatGetSubMatrix_MPIAIJ}; 1507 1508 1509 #undef __FUNC__ 1510 #define __FUNC__ "MatCreateMPIAIJ" 1511 /*@C 1512 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1513 (the default parallel PETSc format). For good matrix assembly performance 1514 the user should preallocate the matrix storage by setting the parameters 1515 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1516 performance can be increased by more than a factor of 50. 1517 1518 Input Parameters: 1519 . comm - MPI communicator 1520 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1521 This value should be the same as the local size used in creating the 1522 y vector for the matrix-vector product y = Ax. 1523 . n - This value should be the same as the local size used in creating the 1524 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1525 calculated if N is given) For square matrices n is almost always m. 1526 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1527 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1528 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1529 (same for all local rows) 1530 . d_nzz - array containing the number of nonzeros in the various rows of the 1531 diagonal portion of the local submatrix (possibly different for each row) 1532 or PETSC_NULL. You must leave room for the diagonal entry even if 1533 it is zero. 1534 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1535 submatrix (same for all local rows). 1536 . o_nzz - array containing the number of nonzeros in the various rows of the 1537 off-diagonal portion of the local submatrix (possibly different for 1538 each row) or PETSC_NULL. 1539 1540 Output Parameter: 1541 . A - the matrix 1542 1543 Notes: 1544 The AIJ format (also called the Yale sparse matrix format or 1545 compressed row storage), is fully compatible with standard Fortran 77 1546 storage. That is, the stored row and column indices can begin at 1547 either one (as in Fortran) or zero. See the users manual for details. 1548 1549 The user MUST specify either the local or global matrix dimensions 1550 (possibly both). 1551 1552 By default, this format uses inodes (identical nodes) when possible. 1553 We search for consecutive rows with the same nonzero structure, thereby 1554 reusing matrix information to achieve increased efficiency. 1555 1556 Options Database Keys: 1557 $ -mat_aij_no_inode - Do not use inodes 1558 $ -mat_aij_inode_limit <limit> - Set inode limit. 1559 $ (max limit=5) 1560 $ -mat_aij_oneindex - Internally use indexing starting at 1 1561 $ rather than 0. Note: When calling MatSetValues(), 1562 $ the user still MUST index entries starting at 0! 1563 1564 Storage Information: 1565 For a square global matrix we define each processor's diagonal portion 1566 to be its local rows and the corresponding columns (a square submatrix); 1567 each processor's off-diagonal portion encompasses the remainder of the 1568 local matrix (a rectangular submatrix). 1569 1570 The user can specify preallocated storage for the diagonal part of 1571 the local submatrix with either d_nz or d_nnz (not both). Set 1572 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1573 memory allocation. Likewise, specify preallocated storage for the 1574 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1575 1576 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1577 the figure below we depict these three local rows and all columns (0-11). 1578 1579 $ 0 1 2 3 4 5 6 7 8 9 10 11 1580 $ ------------------- 1581 $ row 3 | o o o d d d o o o o o o 1582 $ row 4 | o o o d d d o o o o o o 1583 $ row 5 | o o o d d d o o o o o o 1584 $ ------------------- 1585 $ 1586 1587 Thus, any entries in the d locations are stored in the d (diagonal) 1588 submatrix, and any entries in the o locations are stored in the 1589 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1590 stored simply in the MATSEQAIJ format for compressed row storage. 1591 1592 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1593 and o_nz should indicate the number of nonzeros per row in the o matrix. 1594 In general, for PDE problems in which most nonzeros are near the diagonal, 1595 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1596 or you will get TERRIBLE performance; see the users' manual chapter on 1597 matrices. 1598 1599 .keywords: matrix, aij, compressed row, sparse, parallel 1600 1601 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1602 @*/ 1603 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1604 { 1605 Mat B; 1606 Mat_MPIAIJ *b; 1607 int ierr, i,sum[2],work[2],size; 1608 1609 PetscFunctionBegin; 1610 MPI_Comm_size(comm,&size); 1611 if (size == 1) { 1612 if (M == PETSC_DECIDE) M = m; 1613 if (N == PETSC_DECIDE) N = n; 1614 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 *A = 0; 1619 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1620 PLogObjectCreate(B); 1621 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1622 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1623 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1624 B->ops->destroy = MatDestroy_MPIAIJ; 1625 B->ops->view = MatView_MPIAIJ; 1626 B->factor = 0; 1627 B->assembled = PETSC_FALSE; 1628 B->mapping = 0; 1629 1630 B->insertmode = NOT_SET_VALUES; 1631 b->size = size; 1632 MPI_Comm_rank(comm,&b->rank); 1633 1634 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1635 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1636 } 1637 1638 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1639 work[0] = m; work[1] = n; 1640 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1641 if (M == PETSC_DECIDE) M = sum[0]; 1642 if (N == PETSC_DECIDE) N = sum[1]; 1643 } 1644 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1645 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1646 b->m = m; B->m = m; 1647 b->n = n; B->n = n; 1648 b->N = N; B->N = N; 1649 b->M = M; B->M = M; 1650 1651 /* build local table of row and column ownerships */ 1652 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1653 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1654 b->cowners = b->rowners + b->size + 2; 1655 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1656 b->rowners[0] = 0; 1657 for ( i=2; i<=b->size; i++ ) { 1658 b->rowners[i] += b->rowners[i-1]; 1659 } 1660 b->rstart = b->rowners[b->rank]; 1661 b->rend = b->rowners[b->rank+1]; 1662 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1663 b->cowners[0] = 0; 1664 for ( i=2; i<=b->size; i++ ) { 1665 b->cowners[i] += b->cowners[i-1]; 1666 } 1667 b->cstart = b->cowners[b->rank]; 1668 b->cend = b->cowners[b->rank+1]; 1669 1670 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1671 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1672 PLogObjectParent(B,b->A); 1673 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1674 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1675 PLogObjectParent(B,b->B); 1676 1677 /* build cache for off array entries formed */ 1678 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1679 b->donotstash = 0; 1680 b->colmap = 0; 1681 b->garray = 0; 1682 b->roworiented = 1; 1683 1684 /* stuff used for matrix vector multiply */ 1685 b->lvec = 0; 1686 b->Mvctx = 0; 1687 1688 /* stuff for MatGetRow() */ 1689 b->rowindices = 0; 1690 b->rowvalues = 0; 1691 b->getrowactive = PETSC_FALSE; 1692 1693 *A = B; 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNC__ 1698 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1699 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1700 { 1701 Mat mat; 1702 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1703 int ierr, len=0, flg; 1704 1705 PetscFunctionBegin; 1706 *newmat = 0; 1707 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1708 PLogObjectCreate(mat); 1709 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1710 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1711 mat->ops->destroy = MatDestroy_MPIAIJ; 1712 mat->ops->view = MatView_MPIAIJ; 1713 mat->factor = matin->factor; 1714 mat->assembled = PETSC_TRUE; 1715 1716 a->m = mat->m = oldmat->m; 1717 a->n = mat->n = oldmat->n; 1718 a->M = mat->M = oldmat->M; 1719 a->N = mat->N = oldmat->N; 1720 1721 a->rstart = oldmat->rstart; 1722 a->rend = oldmat->rend; 1723 a->cstart = oldmat->cstart; 1724 a->cend = oldmat->cend; 1725 a->size = oldmat->size; 1726 a->rank = oldmat->rank; 1727 mat->insertmode = NOT_SET_VALUES; 1728 a->rowvalues = 0; 1729 a->getrowactive = PETSC_FALSE; 1730 1731 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1732 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1733 a->cowners = a->rowners + a->size + 2; 1734 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1735 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1736 if (oldmat->colmap) { 1737 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1738 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1739 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1740 } else a->colmap = 0; 1741 if (oldmat->garray) { 1742 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1743 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1744 PLogObjectMemory(mat,len*sizeof(int)); 1745 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1746 } else a->garray = 0; 1747 1748 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1749 PLogObjectParent(mat,a->lvec); 1750 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1751 PLogObjectParent(mat,a->Mvctx); 1752 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1753 PLogObjectParent(mat,a->A); 1754 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1755 PLogObjectParent(mat,a->B); 1756 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1757 if (flg) { 1758 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1759 } 1760 *newmat = mat; 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #include "sys.h" 1765 1766 #undef __FUNC__ 1767 #define __FUNC__ "MatLoad_MPIAIJ" 1768 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1769 { 1770 Mat A; 1771 Scalar *vals,*svals; 1772 MPI_Comm comm = ((PetscObject)viewer)->comm; 1773 MPI_Status status; 1774 int i, nz, ierr, j,rstart, rend, fd; 1775 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1776 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1777 int tag = ((PetscObject)viewer)->tag; 1778 1779 PetscFunctionBegin; 1780 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1781 if (!rank) { 1782 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1783 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1784 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1785 if (header[3] < 0) { 1786 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1787 } 1788 } 1789 1790 1791 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1792 M = header[1]; N = header[2]; 1793 /* determine ownership of all rows */ 1794 m = M/size + ((M % size) > rank); 1795 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1796 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1797 rowners[0] = 0; 1798 for ( i=2; i<=size; i++ ) { 1799 rowners[i] += rowners[i-1]; 1800 } 1801 rstart = rowners[rank]; 1802 rend = rowners[rank+1]; 1803 1804 /* distribute row lengths to all processors */ 1805 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1806 offlens = ourlens + (rend-rstart); 1807 if (!rank) { 1808 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1809 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1810 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1811 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1812 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1813 PetscFree(sndcounts); 1814 } else { 1815 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1816 } 1817 1818 if (!rank) { 1819 /* calculate the number of nonzeros on each processor */ 1820 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1821 PetscMemzero(procsnz,size*sizeof(int)); 1822 for ( i=0; i<size; i++ ) { 1823 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1824 procsnz[i] += rowlengths[j]; 1825 } 1826 } 1827 PetscFree(rowlengths); 1828 1829 /* determine max buffer needed and allocate it */ 1830 maxnz = 0; 1831 for ( i=0; i<size; i++ ) { 1832 maxnz = PetscMax(maxnz,procsnz[i]); 1833 } 1834 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1835 1836 /* read in my part of the matrix column indices */ 1837 nz = procsnz[0]; 1838 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1839 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1840 1841 /* read in every one elses and ship off */ 1842 for ( i=1; i<size; i++ ) { 1843 nz = procsnz[i]; 1844 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1845 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1846 } 1847 PetscFree(cols); 1848 } else { 1849 /* determine buffer space needed for message */ 1850 nz = 0; 1851 for ( i=0; i<m; i++ ) { 1852 nz += ourlens[i]; 1853 } 1854 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1855 1856 /* receive message of column indices*/ 1857 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1858 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1859 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1860 } 1861 1862 /* loop over local rows, determining number of off diagonal entries */ 1863 PetscMemzero(offlens,m*sizeof(int)); 1864 jj = 0; 1865 for ( i=0; i<m; i++ ) { 1866 for ( j=0; j<ourlens[i]; j++ ) { 1867 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1868 jj++; 1869 } 1870 } 1871 1872 /* create our matrix */ 1873 for ( i=0; i<m; i++ ) { 1874 ourlens[i] -= offlens[i]; 1875 } 1876 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1877 A = *newmat; 1878 MatSetOption(A,MAT_COLUMNS_SORTED); 1879 for ( i=0; i<m; i++ ) { 1880 ourlens[i] += offlens[i]; 1881 } 1882 1883 if (!rank) { 1884 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1885 1886 /* read in my part of the matrix numerical values */ 1887 nz = procsnz[0]; 1888 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1889 1890 /* insert into matrix */ 1891 jj = rstart; 1892 smycols = mycols; 1893 svals = vals; 1894 for ( i=0; i<m; i++ ) { 1895 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1896 smycols += ourlens[i]; 1897 svals += ourlens[i]; 1898 jj++; 1899 } 1900 1901 /* read in other processors and ship out */ 1902 for ( i=1; i<size; i++ ) { 1903 nz = procsnz[i]; 1904 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1905 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1906 } 1907 PetscFree(procsnz); 1908 } else { 1909 /* receive numeric values */ 1910 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1911 1912 /* receive message of values*/ 1913 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1914 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1915 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1916 1917 /* insert into matrix */ 1918 jj = rstart; 1919 smycols = mycols; 1920 svals = vals; 1921 for ( i=0; i<m; i++ ) { 1922 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1923 smycols += ourlens[i]; 1924 svals += ourlens[i]; 1925 jj++; 1926 } 1927 } 1928 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1929 1930 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1931 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNC__ 1936 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1937 /* 1938 Not great since it makes two copies of the submatrix, first an SeqAIJ 1939 in local and then by concatenating the local matrices the end result. 1940 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1941 */ 1942 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1943 { 1944 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1945 Mat *local,M; 1946 Scalar *vwork,*aa; 1947 MPI_Comm comm = mat->comm; 1948 Mat_SeqAIJ *aij; 1949 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1950 1951 PetscFunctionBegin; 1952 MPI_Comm_rank(comm,&rank); 1953 MPI_Comm_size(comm,&size); 1954 1955 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1956 1957 /* 1958 m - number of local rows 1959 n - number of columns (same on all processors) 1960 rstart - first row in new global matrix generated 1961 */ 1962 ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr); 1963 if (call == MAT_INITIAL_MATRIX) { 1964 aij = (Mat_SeqAIJ *) (*local)->data; 1965 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 1966 ii = aij->i; 1967 jj = aij->j; 1968 1969 /* 1970 Determine the number of non-zeros in the diagonal and off-diagonal 1971 portions of the matrix in order to do correct preallocation 1972 */ 1973 1974 /* first get start and end of "diagonal" columns */ 1975 if (csize == PETSC_DECIDE) { 1976 nlocal = n/size + ((n % size) > rank); 1977 } else { 1978 nlocal = csize; 1979 } 1980 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1981 rstart = rend - nlocal; 1982 if (rank == size - 1 && rend != n) { 1983 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 1984 } 1985 1986 /* next, compute all the lengths */ 1987 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 1988 olens = dlens + m; 1989 for ( i=0; i<m; i++ ) { 1990 jend = ii[i+1] - ii[i]; 1991 olen = 0; 1992 dlen = 0; 1993 for ( j=0; j<jend; j++ ) { 1994 if ( *jj < rstart || *jj >= rend) olen++; 1995 else dlen++; 1996 jj++; 1997 } 1998 olens[i] = olen; 1999 dlens[i] = dlen; 2000 } 2001 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2002 PetscFree(dlens); 2003 } else { 2004 int ml,nl; 2005 2006 M = *newmat; 2007 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2008 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2009 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2010 } 2011 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2012 aij = (Mat_SeqAIJ *) (*local)->data; 2013 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2014 ii = aij->i; 2015 jj = aij->j; 2016 aa = aij->a; 2017 for (i=0; i<m; i++) { 2018 row = rstart + i; 2019 nz = ii[i+1] - ii[i]; 2020 cwork = jj; jj += nz; 2021 vwork = aa; aa += nz; 2022 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2023 } 2024 2025 ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr); 2026 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2027 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2028 *newmat = M; 2029 PetscFunctionReturn(0); 2030 } 2031