1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.226 1997/11/07 01:58:23 bsmith Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } 238 else col = in[j]; 239 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 240 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 241 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 242 } 243 } 244 } 245 else { 246 if (roworiented && !aij->donotstash) { 247 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 248 } 249 else { 250 if (!aij->donotstash) { 251 row = im[i]; 252 for ( j=0; j<n; j++ ) { 253 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 254 } 255 } 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNC__ 263 #define __FUNC__ "MatGetValues_MPIAIJ" 264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 265 { 266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 267 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 268 int cstart = aij->cstart, cend = aij->cend,row,col; 269 270 PetscFunctionBegin; 271 for ( i=0; i<m; i++ ) { 272 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 273 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 274 if (idxm[i] >= rstart && idxm[i] < rend) { 275 row = idxm[i] - rstart; 276 for ( j=0; j<n; j++ ) { 277 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 278 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 279 if (idxn[j] >= cstart && idxn[j] < cend){ 280 col = idxn[j] - cstart; 281 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 282 } 283 else { 284 if (!aij->colmap) { 285 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 286 } 287 col = aij->colmap[idxn[j]] - 1; 288 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 289 else { 290 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 291 } 292 } 293 } 294 } 295 else { 296 SETERRQ(1,0,"Only local values currently supported"); 297 } 298 } 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNC__ 303 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 304 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 307 MPI_Comm comm = mat->comm; 308 int size = aij->size, *owners = aij->rowners; 309 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 310 MPI_Request *send_waits,*recv_waits; 311 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 312 InsertMode addv; 313 Scalar *rvalues,*svalues; 314 315 PetscFunctionBegin; 316 /* make sure all processors are either in INSERTMODE or ADDMODE */ 317 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 318 if (addv == (ADD_VALUES|INSERT_VALUES)) { 319 SETERRQ(1,0,"Some processors inserted others added"); 320 } 321 mat->insertmode = addv; /* in case this processor had no cache */ 322 323 /* first count number of contributors to each processor */ 324 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 325 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 326 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 327 for ( i=0; i<aij->stash.n; i++ ) { 328 idx = aij->stash.idx[i]; 329 for ( j=0; j<size; j++ ) { 330 if (idx >= owners[j] && idx < owners[j+1]) { 331 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 332 } 333 } 334 } 335 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 336 337 /* inform other processors of number of messages and max length*/ 338 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 339 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 340 nreceives = work[rank]; 341 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 342 nmax = work[rank]; 343 PetscFree(work); 344 345 /* post receives: 346 1) each message will consist of ordered pairs 347 (global index,value) we store the global index as a double 348 to simplify the message passing. 349 2) since we don't know how long each individual message is we 350 allocate the largest needed buffer for each receive. Potentially 351 this is a lot of wasted space. 352 353 354 This could be done better. 355 */ 356 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 358 for ( i=0; i<nreceives; i++ ) { 359 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 360 comm,recv_waits+i);CHKERRQ(ierr); 361 } 362 363 /* do sends: 364 1) starts[i] gives the starting index in svalues for stuff going to 365 the ith processor 366 */ 367 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 368 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 369 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 370 starts[0] = 0; 371 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 372 for ( i=0; i<aij->stash.n; i++ ) { 373 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 374 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 375 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 376 } 377 PetscFree(owner); 378 starts[0] = 0; 379 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 380 count = 0; 381 for ( i=0; i<size; i++ ) { 382 if (procs[i]) { 383 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 384 } 385 } 386 PetscFree(starts); PetscFree(nprocs); 387 388 /* Free cache space */ 389 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 390 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 391 392 aij->svalues = svalues; aij->rvalues = rvalues; 393 aij->nsends = nsends; aij->nrecvs = nreceives; 394 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 395 aij->rmax = nmax; 396 397 PetscFunctionReturn(0); 398 } 399 extern int MatSetUpMultiply_MPIAIJ(Mat); 400 401 #undef __FUNC__ 402 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 403 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 404 { 405 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 406 MPI_Status *send_status,recv_status; 407 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 408 int row,col,other_disassembled; 409 Scalar *values,val; 410 InsertMode addv = mat->insertmode; 411 412 PetscFunctionBegin; 413 /* wait on receives */ 414 while (count) { 415 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 416 /* unpack receives into our local space */ 417 values = aij->rvalues + 3*imdex*aij->rmax; 418 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 419 n = n/3; 420 for ( i=0; i<n; i++ ) { 421 row = (int) PetscReal(values[3*i]) - aij->rstart; 422 col = (int) PetscReal(values[3*i+1]); 423 val = values[3*i+2]; 424 if (col >= aij->cstart && col < aij->cend) { 425 col -= aij->cstart; 426 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 427 } else { 428 if (mat->was_assembled || mat->assembled) { 429 if (!aij->colmap) { 430 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 431 } 432 col = aij->colmap[col] - 1; 433 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 434 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 435 col = (int) PetscReal(values[3*i+1]); 436 } 437 } 438 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 439 } 440 } 441 count--; 442 } 443 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 444 445 /* wait on sends */ 446 if (aij->nsends) { 447 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 448 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 449 PetscFree(send_status); 450 } 451 PetscFree(aij->send_waits); PetscFree(aij->svalues); 452 453 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 454 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 455 456 /* determine if any processor has disassembled, if so we must 457 also disassemble ourselfs, in order that we may reassemble. */ 458 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 459 if (mat->was_assembled && !other_disassembled) { 460 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 461 } 462 463 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 464 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 465 } 466 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 467 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 468 469 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatZeroEntries_MPIAIJ" 475 int MatZeroEntries_MPIAIJ(Mat A) 476 { 477 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 478 int ierr; 479 480 PetscFunctionBegin; 481 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 482 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 483 PetscFunctionReturn(0); 484 } 485 486 /* the code does not do the diagonal entries correctly unless the 487 matrix is square and the column and row owerships are identical. 488 This is a BUG. The only way to fix it seems to be to access 489 aij->A and aij->B directly and not through the MatZeroRows() 490 routine. 491 */ 492 #undef __FUNC__ 493 #define __FUNC__ "MatZeroRows_MPIAIJ" 494 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 495 { 496 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 497 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 498 int *procs,*nprocs,j,found,idx,nsends,*work; 499 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 500 int *rvalues,tag = A->tag,count,base,slen,n,*source; 501 int *lens,imdex,*lrows,*values; 502 MPI_Comm comm = A->comm; 503 MPI_Request *send_waits,*recv_waits; 504 MPI_Status recv_status,*send_status; 505 IS istmp; 506 507 PetscFunctionBegin; 508 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 509 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 510 511 /* first count number of contributors to each processor */ 512 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 513 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 514 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 515 for ( i=0; i<N; i++ ) { 516 idx = rows[i]; 517 found = 0; 518 for ( j=0; j<size; j++ ) { 519 if (idx >= owners[j] && idx < owners[j+1]) { 520 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 521 } 522 } 523 if (!found) SETERRQ(1,0,"Index out of range"); 524 } 525 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 526 527 /* inform other processors of number of messages and max length*/ 528 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 529 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 530 nrecvs = work[rank]; 531 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 532 nmax = work[rank]; 533 PetscFree(work); 534 535 /* post receives: */ 536 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 537 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 538 for ( i=0; i<nrecvs; i++ ) { 539 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 540 } 541 542 /* do sends: 543 1) starts[i] gives the starting index in svalues for stuff going to 544 the ith processor 545 */ 546 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 547 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 548 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 549 starts[0] = 0; 550 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 551 for ( i=0; i<N; i++ ) { 552 svalues[starts[owner[i]]++] = rows[i]; 553 } 554 ISRestoreIndices(is,&rows); 555 556 starts[0] = 0; 557 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 558 count = 0; 559 for ( i=0; i<size; i++ ) { 560 if (procs[i]) { 561 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 562 } 563 } 564 PetscFree(starts); 565 566 base = owners[rank]; 567 568 /* wait on receives */ 569 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 570 source = lens + nrecvs; 571 count = nrecvs; slen = 0; 572 while (count) { 573 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 574 /* unpack receives into our local space */ 575 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 576 source[imdex] = recv_status.MPI_SOURCE; 577 lens[imdex] = n; 578 slen += n; 579 count--; 580 } 581 PetscFree(recv_waits); 582 583 /* move the data into the send scatter */ 584 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 585 count = 0; 586 for ( i=0; i<nrecvs; i++ ) { 587 values = rvalues + i*nmax; 588 for ( j=0; j<lens[i]; j++ ) { 589 lrows[count++] = values[j] - base; 590 } 591 } 592 PetscFree(rvalues); PetscFree(lens); 593 PetscFree(owner); PetscFree(nprocs); 594 595 /* actually zap the local rows */ 596 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 597 PLogObjectParent(A,istmp); 598 PetscFree(lrows); 599 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 600 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 601 ierr = ISDestroy(istmp); CHKERRQ(ierr); 602 603 /* wait on sends */ 604 if (nsends) { 605 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 606 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 607 PetscFree(send_status); 608 } 609 PetscFree(send_waits); PetscFree(svalues); 610 611 PetscFunctionReturn(0); 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatMult_MPIAIJ" 616 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 617 { 618 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 619 int ierr,nt; 620 621 PetscFunctionBegin; 622 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 623 if (nt != a->n) { 624 SETERRQ(1,0,"Incompatible partition of A and xx"); 625 } 626 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 627 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 628 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 629 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 #undef __FUNC__ 634 #define __FUNC__ "MatMultAdd_MPIAIJ" 635 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 636 { 637 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 638 int ierr; 639 640 PetscFunctionBegin; 641 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 642 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 643 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 644 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNC__ 649 #define __FUNC__ "MatMultTrans_MPIAIJ" 650 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 651 { 652 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 653 int ierr; 654 655 PetscFunctionBegin; 656 /* do nondiagonal part */ 657 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 658 /* send it on its way */ 659 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 660 /* do local part */ 661 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 662 /* receive remote parts: note this assumes the values are not actually */ 663 /* inserted in yy until the next line, which is true for my implementation*/ 664 /* but is not perhaps always true. */ 665 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 #undef __FUNC__ 670 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 671 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 674 int ierr; 675 676 PetscFunctionBegin; 677 /* do nondiagonal part */ 678 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 679 /* send it on its way */ 680 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 681 /* do local part */ 682 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 683 /* receive remote parts: note this assumes the values are not actually */ 684 /* inserted in yy until the next line, which is true for my implementation*/ 685 /* but is not perhaps always true. */ 686 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 This only works correctly for square matrices where the subblock A->A is the 692 diagonal block 693 */ 694 #undef __FUNC__ 695 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 696 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 697 { 698 int ierr; 699 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 700 701 PetscFunctionBegin; 702 if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 703 if (a->rstart != a->cstart || a->rend != a->cend) { 704 SETERRQ(1,0,"row partition must equal col partition"); 705 } 706 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNC__ 711 #define __FUNC__ "MatScale_MPIAIJ" 712 int MatScale_MPIAIJ(Scalar *aa,Mat A) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 715 int ierr; 716 717 PetscFunctionBegin; 718 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 719 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNC__ 724 #define __FUNC__ "MatDestroy_MPIAIJ" 725 int MatDestroy_MPIAIJ(PetscObject obj) 726 { 727 Mat mat = (Mat) obj; 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 729 int ierr; 730 731 PetscFunctionBegin; 732 #if defined(USE_PETSC_LOG) 733 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 734 #endif 735 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 736 PetscFree(aij->rowners); 737 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 738 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 739 if (aij->colmap) PetscFree(aij->colmap); 740 if (aij->garray) PetscFree(aij->garray); 741 if (aij->lvec) VecDestroy(aij->lvec); 742 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 743 if (aij->rowvalues) PetscFree(aij->rowvalues); 744 PetscFree(aij); 745 PLogObjectDestroy(mat); 746 PetscHeaderDestroy(mat); 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNC__ 751 #define __FUNC__ "MatView_MPIAIJ_Binary" 752 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 753 { 754 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 755 int ierr; 756 757 PetscFunctionBegin; 758 if (aij->size == 1) { 759 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 760 } 761 else SETERRQ(1,0,"Only uniprocessor output supported"); 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNC__ 766 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 767 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 768 { 769 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 770 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 771 int ierr, format,shift = C->indexshift,rank; 772 FILE *fd; 773 ViewerType vtype; 774 775 PetscFunctionBegin; 776 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 777 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 778 ierr = ViewerGetFormat(viewer,&format); 779 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 780 MatInfo info; 781 int flg; 782 MPI_Comm_rank(mat->comm,&rank); 783 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 784 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 785 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 786 PetscSequentialPhaseBegin(mat->comm,1); 787 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 788 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 789 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 790 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 791 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 792 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 793 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 794 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 795 fflush(fd); 796 PetscSequentialPhaseEnd(mat->comm,1); 797 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 800 PetscFunctionReturn(0); 801 } 802 } 803 804 if (vtype == DRAW_VIEWER) { 805 Draw draw; 806 PetscTruth isnull; 807 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 808 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 809 } 810 811 if (vtype == ASCII_FILE_VIEWER) { 812 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 813 PetscSequentialPhaseBegin(mat->comm,1); 814 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 815 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 816 aij->cend); 817 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 818 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 819 fflush(fd); 820 PetscSequentialPhaseEnd(mat->comm,1); 821 } else { 822 int size = aij->size; 823 rank = aij->rank; 824 if (size == 1) { 825 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 826 } else { 827 /* assemble the entire matrix onto first processor. */ 828 Mat A; 829 Mat_SeqAIJ *Aloc; 830 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 831 Scalar *a; 832 833 if (!rank) { 834 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 835 } else { 836 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 837 } 838 PLogObjectParent(mat,A); 839 840 /* copy over the A part */ 841 Aloc = (Mat_SeqAIJ*) aij->A->data; 842 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 843 row = aij->rstart; 844 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 845 for ( i=0; i<m; i++ ) { 846 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 847 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 848 } 849 aj = Aloc->j; 850 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 851 852 /* copy over the B part */ 853 Aloc = (Mat_SeqAIJ*) aij->B->data; 854 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 855 row = aij->rstart; 856 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 857 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 858 for ( i=0; i<m; i++ ) { 859 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 860 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 861 } 862 PetscFree(ct); 863 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 864 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 865 /* 866 Everyone has to call to draw the matrix since the graphics waits are 867 synchronized across all processors that share the Draw object 868 */ 869 if (!rank || vtype == DRAW_VIEWER) { 870 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 871 } 872 ierr = MatDestroy(A); CHKERRQ(ierr); 873 } 874 } 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNC__ 879 #define __FUNC__ "MatView_MPIAIJ" 880 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 881 { 882 Mat mat = (Mat) obj; 883 int ierr; 884 ViewerType vtype; 885 886 PetscFunctionBegin; 887 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 888 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 889 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 890 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 891 } 892 else if (vtype == BINARY_FILE_VIEWER) { 893 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 894 } 895 PetscFunctionReturn(0); 896 } 897 898 /* 899 This has to provide several versions. 900 901 2) a) use only local smoothing updating outer values only once. 902 b) local smoothing updating outer values each inner iteration 903 3) color updating out values betwen colors. 904 */ 905 #undef __FUNC__ 906 #define __FUNC__ "MatRelax_MPIAIJ" 907 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 908 double fshift,int its,Vec xx) 909 { 910 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 911 Mat AA = mat->A, BB = mat->B; 912 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 913 Scalar *b,*x,*xs,*ls,d,*v,sum; 914 int ierr,*idx, *diag; 915 int n = mat->n, m = mat->m, i,shift = A->indexshift; 916 917 PetscFunctionBegin; 918 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 919 xs = x + shift; /* shift by one for index start of 1 */ 920 ls = ls + shift; 921 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 922 diag = A->diag; 923 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 924 if (flag & SOR_ZERO_INITIAL_GUESS) { 925 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 929 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 930 while (its--) { 931 /* go down through the rows */ 932 for ( i=0; i<m; i++ ) { 933 n = A->i[i+1] - A->i[i]; 934 idx = A->j + A->i[i] + shift; 935 v = A->a + A->i[i] + shift; 936 sum = b[i]; 937 SPARSEDENSEMDOT(sum,xs,v,idx,n); 938 d = fshift + A->a[diag[i]+shift]; 939 n = B->i[i+1] - B->i[i]; 940 idx = B->j + B->i[i] + shift; 941 v = B->a + B->i[i] + shift; 942 SPARSEDENSEMDOT(sum,ls,v,idx,n); 943 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 944 } 945 /* come up through the rows */ 946 for ( i=m-1; i>-1; i-- ) { 947 n = A->i[i+1] - A->i[i]; 948 idx = A->j + A->i[i] + shift; 949 v = A->a + A->i[i] + shift; 950 sum = b[i]; 951 SPARSEDENSEMDOT(sum,xs,v,idx,n); 952 d = fshift + A->a[diag[i]+shift]; 953 n = B->i[i+1] - B->i[i]; 954 idx = B->j + B->i[i] + shift; 955 v = B->a + B->i[i] + shift; 956 SPARSEDENSEMDOT(sum,ls,v,idx,n); 957 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 958 } 959 } 960 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 967 while (its--) { 968 for ( i=0; i<m; i++ ) { 969 n = A->i[i+1] - A->i[i]; 970 idx = A->j + A->i[i] + shift; 971 v = A->a + A->i[i] + shift; 972 sum = b[i]; 973 SPARSEDENSEMDOT(sum,xs,v,idx,n); 974 d = fshift + A->a[diag[i]+shift]; 975 n = B->i[i+1] - B->i[i]; 976 idx = B->j + B->i[i] + shift; 977 v = B->a + B->i[i] + shift; 978 SPARSEDENSEMDOT(sum,ls,v,idx,n); 979 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 980 } 981 } 982 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 983 if (flag & SOR_ZERO_INITIAL_GUESS) { 984 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } 987 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 988 mat->Mvctx); CHKERRQ(ierr); 989 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 990 mat->Mvctx); CHKERRQ(ierr); 991 while (its--) { 992 for ( i=m-1; i>-1; i-- ) { 993 n = A->i[i+1] - A->i[i]; 994 idx = A->j + A->i[i] + shift; 995 v = A->a + A->i[i] + shift; 996 sum = b[i]; 997 SPARSEDENSEMDOT(sum,xs,v,idx,n); 998 d = fshift + A->a[diag[i]+shift]; 999 n = B->i[i+1] - B->i[i]; 1000 idx = B->j + B->i[i] + shift; 1001 v = B->a + B->i[i] + shift; 1002 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1003 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1004 } 1005 } 1006 } else { 1007 SETERRQ(1,0,"Parallel SOR not supported"); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNC__ 1013 #define __FUNC__ "MatGetInfo_MPIAIJ" 1014 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1015 { 1016 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1017 Mat A = mat->A, B = mat->B; 1018 int ierr; 1019 double isend[5], irecv[5]; 1020 1021 PetscFunctionBegin; 1022 info->block_size = 1.0; 1023 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1024 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1025 isend[3] = info->memory; isend[4] = info->mallocs; 1026 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1027 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1028 isend[3] += info->memory; isend[4] += info->mallocs; 1029 if (flag == MAT_LOCAL) { 1030 info->nz_used = isend[0]; 1031 info->nz_allocated = isend[1]; 1032 info->nz_unneeded = isend[2]; 1033 info->memory = isend[3]; 1034 info->mallocs = isend[4]; 1035 } else if (flag == MAT_GLOBAL_MAX) { 1036 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1037 info->nz_used = irecv[0]; 1038 info->nz_allocated = irecv[1]; 1039 info->nz_unneeded = irecv[2]; 1040 info->memory = irecv[3]; 1041 info->mallocs = irecv[4]; 1042 } else if (flag == MAT_GLOBAL_SUM) { 1043 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1044 info->nz_used = irecv[0]; 1045 info->nz_allocated = irecv[1]; 1046 info->nz_unneeded = irecv[2]; 1047 info->memory = irecv[3]; 1048 info->mallocs = irecv[4]; 1049 } 1050 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1051 info->fill_ratio_needed = 0; 1052 info->factor_mallocs = 0; 1053 info->rows_global = (double)mat->M; 1054 info->columns_global = (double)mat->N; 1055 info->rows_local = (double)mat->m; 1056 info->columns_local = (double)mat->N; 1057 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNC__ 1062 #define __FUNC__ "MatSetOption_MPIAIJ" 1063 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1064 { 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1066 1067 PetscFunctionBegin; 1068 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1069 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1070 op == MAT_COLUMNS_UNSORTED || 1071 op == MAT_COLUMNS_SORTED || 1072 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1073 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1074 MatSetOption(a->A,op); 1075 MatSetOption(a->B,op); 1076 } else if (op == MAT_ROW_ORIENTED) { 1077 a->roworiented = 1; 1078 MatSetOption(a->A,op); 1079 MatSetOption(a->B,op); 1080 } else if (op == MAT_ROWS_SORTED || 1081 op == MAT_ROWS_UNSORTED || 1082 op == MAT_SYMMETRIC || 1083 op == MAT_STRUCTURALLY_SYMMETRIC || 1084 op == MAT_YES_NEW_DIAGONALS) 1085 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1086 else if (op == MAT_COLUMN_ORIENTED) { 1087 a->roworiented = 0; 1088 MatSetOption(a->A,op); 1089 MatSetOption(a->B,op); 1090 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1091 a->donotstash = 1; 1092 } else if (op == MAT_NO_NEW_DIAGONALS){ 1093 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1094 } else { 1095 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1096 } 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNC__ 1101 #define __FUNC__ "MatGetSize_MPIAIJ" 1102 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1103 { 1104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1105 1106 PetscFunctionBegin; 1107 if (m) *m = mat->M; 1108 if (n) *n = mat->N; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNC__ 1113 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1114 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1115 { 1116 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1117 1118 PetscFunctionBegin; 1119 if (m) *m = mat->m; 1120 if (n) *n = mat->N; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNC__ 1125 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1126 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1127 { 1128 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1129 1130 PetscFunctionBegin; 1131 *m = mat->rstart; *n = mat->rend; 1132 PetscFunctionReturn(0); 1133 } 1134 1135 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1136 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1137 1138 #undef __FUNC__ 1139 #define __FUNC__ "MatGetRow_MPIAIJ" 1140 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1141 { 1142 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1143 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1144 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1145 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1146 int *cmap, *idx_p; 1147 1148 PetscFunctionBegin; 1149 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1150 mat->getrowactive = PETSC_TRUE; 1151 1152 if (!mat->rowvalues && (idx || v)) { 1153 /* 1154 allocate enough space to hold information from the longest row. 1155 */ 1156 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1157 int max = 1,m = mat->m,tmp; 1158 for ( i=0; i<m; i++ ) { 1159 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1160 if (max < tmp) { max = tmp; } 1161 } 1162 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1163 CHKPTRQ(mat->rowvalues); 1164 mat->rowindices = (int *) (mat->rowvalues + max); 1165 } 1166 1167 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1168 lrow = row - rstart; 1169 1170 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1171 if (!v) {pvA = 0; pvB = 0;} 1172 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1173 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1174 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1175 nztot = nzA + nzB; 1176 1177 cmap = mat->garray; 1178 if (v || idx) { 1179 if (nztot) { 1180 /* Sort by increasing column numbers, assuming A and B already sorted */ 1181 int imark = -1; 1182 if (v) { 1183 *v = v_p = mat->rowvalues; 1184 for ( i=0; i<nzB; i++ ) { 1185 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1186 else break; 1187 } 1188 imark = i; 1189 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1190 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1191 } 1192 if (idx) { 1193 *idx = idx_p = mat->rowindices; 1194 if (imark > -1) { 1195 for ( i=0; i<imark; i++ ) { 1196 idx_p[i] = cmap[cworkB[i]]; 1197 } 1198 } else { 1199 for ( i=0; i<nzB; i++ ) { 1200 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1201 else break; 1202 } 1203 imark = i; 1204 } 1205 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1206 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1207 } 1208 } 1209 else { 1210 if (idx) *idx = 0; 1211 if (v) *v = 0; 1212 } 1213 } 1214 *nz = nztot; 1215 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1216 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNC__ 1221 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1222 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1223 { 1224 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1225 1226 PetscFunctionBegin; 1227 if (aij->getrowactive == PETSC_FALSE) { 1228 SETERRQ(1,0,"MatGetRow not called"); 1229 } 1230 aij->getrowactive = PETSC_FALSE; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNC__ 1235 #define __FUNC__ "MatNorm_MPIAIJ" 1236 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1239 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1240 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1241 double sum = 0.0; 1242 Scalar *v; 1243 1244 PetscFunctionBegin; 1245 if (aij->size == 1) { 1246 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1247 } else { 1248 if (type == NORM_FROBENIUS) { 1249 v = amat->a; 1250 for (i=0; i<amat->nz; i++ ) { 1251 #if defined(USE_PETSC_COMPLEX) 1252 sum += real(conj(*v)*(*v)); v++; 1253 #else 1254 sum += (*v)*(*v); v++; 1255 #endif 1256 } 1257 v = bmat->a; 1258 for (i=0; i<bmat->nz; i++ ) { 1259 #if defined(USE_PETSC_COMPLEX) 1260 sum += real(conj(*v)*(*v)); v++; 1261 #else 1262 sum += (*v)*(*v); v++; 1263 #endif 1264 } 1265 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1266 *norm = sqrt(*norm); 1267 } else if (type == NORM_1) { /* max column norm */ 1268 double *tmp, *tmp2; 1269 int *jj, *garray = aij->garray; 1270 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1271 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1272 PetscMemzero(tmp,aij->N*sizeof(double)); 1273 *norm = 0.0; 1274 v = amat->a; jj = amat->j; 1275 for ( j=0; j<amat->nz; j++ ) { 1276 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1277 } 1278 v = bmat->a; jj = bmat->j; 1279 for ( j=0; j<bmat->nz; j++ ) { 1280 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1281 } 1282 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1283 for ( j=0; j<aij->N; j++ ) { 1284 if (tmp2[j] > *norm) *norm = tmp2[j]; 1285 } 1286 PetscFree(tmp); PetscFree(tmp2); 1287 } else if (type == NORM_INFINITY) { /* max row norm */ 1288 double ntemp = 0.0; 1289 for ( j=0; j<amat->m; j++ ) { 1290 v = amat->a + amat->i[j] + shift; 1291 sum = 0.0; 1292 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1293 sum += PetscAbsScalar(*v); v++; 1294 } 1295 v = bmat->a + bmat->i[j] + shift; 1296 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1297 sum += PetscAbsScalar(*v); v++; 1298 } 1299 if (sum > ntemp) ntemp = sum; 1300 } 1301 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(1,0,"No support for two norm"); 1304 } 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNC__ 1310 #define __FUNC__ "MatTranspose_MPIAIJ" 1311 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1312 { 1313 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1314 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1315 int ierr,shift = Aloc->indexshift; 1316 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1317 Mat B; 1318 Scalar *array; 1319 1320 PetscFunctionBegin; 1321 if (matout == PETSC_NULL && M != N) { 1322 SETERRQ(1,0,"Square matrix only for in-place"); 1323 } 1324 1325 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1326 1327 /* copy over the A part */ 1328 Aloc = (Mat_SeqAIJ*) a->A->data; 1329 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1330 row = a->rstart; 1331 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1332 for ( i=0; i<m; i++ ) { 1333 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1334 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1335 } 1336 aj = Aloc->j; 1337 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1338 1339 /* copy over the B part */ 1340 Aloc = (Mat_SeqAIJ*) a->B->data; 1341 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1342 row = a->rstart; 1343 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1344 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1345 for ( i=0; i<m; i++ ) { 1346 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1347 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1348 } 1349 PetscFree(ct); 1350 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1351 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1352 if (matout != PETSC_NULL) { 1353 *matout = B; 1354 } else { 1355 /* This isn't really an in-place transpose .... but free data structures from a */ 1356 PetscFree(a->rowners); 1357 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1358 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1359 if (a->colmap) PetscFree(a->colmap); 1360 if (a->garray) PetscFree(a->garray); 1361 if (a->lvec) VecDestroy(a->lvec); 1362 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1363 PetscFree(a); 1364 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1365 PetscHeaderDestroy(B); 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNC__ 1371 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1372 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1373 { 1374 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1375 Mat a = aij->A, b = aij->B; 1376 int ierr,s1,s2,s3; 1377 1378 PetscFunctionBegin; 1379 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1380 if (rr) { 1381 s3 = aij->n; 1382 VecGetLocalSize_Fast(rr,s1); 1383 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1384 /* Overlap communication with computation. */ 1385 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1386 } 1387 if (ll) { 1388 VecGetLocalSize_Fast(ll,s1); 1389 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1390 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1391 } 1392 /* scale the diagonal block */ 1393 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1394 1395 if (rr) { 1396 /* Do a scatter end and then right scale the off-diagonal block */ 1397 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1398 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1399 } 1400 1401 PetscFunctionReturn(0); 1402 } 1403 1404 1405 extern int MatPrintHelp_SeqAIJ(Mat); 1406 #undef __FUNC__ 1407 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1408 int MatPrintHelp_MPIAIJ(Mat A) 1409 { 1410 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1411 int ierr; 1412 1413 PetscFunctionBegin; 1414 if (!a->rank) { 1415 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1416 } 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNC__ 1421 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1422 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1423 { 1424 PetscFunctionBegin; 1425 *bs = 1; 1426 PetscFunctionReturn(0); 1427 } 1428 #undef __FUNC__ 1429 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1430 int MatSetUnfactored_MPIAIJ(Mat A) 1431 { 1432 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1433 int ierr; 1434 1435 PetscFunctionBegin; 1436 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNC__ 1441 #define __FUNC__ "MatEqual_MPIAIJ" 1442 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1443 { 1444 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1445 Mat a, b, c, d; 1446 PetscTruth flg; 1447 int ierr; 1448 1449 PetscFunctionBegin; 1450 if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type"); 1451 a = matA->A; b = matA->B; 1452 c = matB->A; d = matB->B; 1453 1454 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1455 if (flg == PETSC_TRUE) { 1456 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1457 } 1458 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } 1461 1462 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1463 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1464 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1465 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1466 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,MatGetSubMatrixCall,Mat *); 1467 1468 /* -------------------------------------------------------------------*/ 1469 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1470 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1471 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1472 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1473 0,0, 1474 0,0, 1475 0,0, 1476 MatRelax_MPIAIJ, 1477 MatTranspose_MPIAIJ, 1478 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1479 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1480 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1481 0, 1482 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1483 0,0,0,0, 1484 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1485 0,0, 1486 0,0,MatConvertSameType_MPIAIJ,0,0, 1487 0,0,0, 1488 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1489 MatPrintHelp_MPIAIJ, 1490 MatScale_MPIAIJ,0,0,0, 1491 MatGetBlockSize_MPIAIJ,0,0,0,0, 1492 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1493 0,0,MatGetSubMatrix_MPIAIJ}; 1494 1495 1496 #undef __FUNC__ 1497 #define __FUNC__ "MatCreateMPIAIJ" 1498 /*@C 1499 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1500 (the default parallel PETSc format). For good matrix assembly performance 1501 the user should preallocate the matrix storage by setting the parameters 1502 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1503 performance can be increased by more than a factor of 50. 1504 1505 Input Parameters: 1506 . comm - MPI communicator 1507 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1508 This value should be the same as the local size used in creating the 1509 y vector for the matrix-vector product y = Ax. 1510 . n - This value should be the same as the local size used in creating the 1511 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1512 calculated if N is given) For square matrices n is almost always m. 1513 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1514 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1515 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1516 (same for all local rows) 1517 . d_nzz - array containing the number of nonzeros in the various rows of the 1518 diagonal portion of the local submatrix (possibly different for each row) 1519 or PETSC_NULL. You must leave room for the diagonal entry even if 1520 it is zero. 1521 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1522 submatrix (same for all local rows). 1523 . o_nzz - array containing the number of nonzeros in the various rows of the 1524 off-diagonal portion of the local submatrix (possibly different for 1525 each row) or PETSC_NULL. 1526 1527 Output Parameter: 1528 . A - the matrix 1529 1530 Notes: 1531 The AIJ format (also called the Yale sparse matrix format or 1532 compressed row storage), is fully compatible with standard Fortran 77 1533 storage. That is, the stored row and column indices can begin at 1534 either one (as in Fortran) or zero. See the users manual for details. 1535 1536 The user MUST specify either the local or global matrix dimensions 1537 (possibly both). 1538 1539 By default, this format uses inodes (identical nodes) when possible. 1540 We search for consecutive rows with the same nonzero structure, thereby 1541 reusing matrix information to achieve increased efficiency. 1542 1543 Options Database Keys: 1544 $ -mat_aij_no_inode - Do not use inodes 1545 $ -mat_aij_inode_limit <limit> - Set inode limit. 1546 $ (max limit=5) 1547 $ -mat_aij_oneindex - Internally use indexing starting at 1 1548 $ rather than 0. Note: When calling MatSetValues(), 1549 $ the user still MUST index entries starting at 0! 1550 1551 Storage Information: 1552 For a square global matrix we define each processor's diagonal portion 1553 to be its local rows and the corresponding columns (a square submatrix); 1554 each processor's off-diagonal portion encompasses the remainder of the 1555 local matrix (a rectangular submatrix). 1556 1557 The user can specify preallocated storage for the diagonal part of 1558 the local submatrix with either d_nz or d_nnz (not both). Set 1559 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1560 memory allocation. Likewise, specify preallocated storage for the 1561 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1562 1563 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1564 the figure below we depict these three local rows and all columns (0-11). 1565 1566 $ 0 1 2 3 4 5 6 7 8 9 10 11 1567 $ ------------------- 1568 $ row 3 | o o o d d d o o o o o o 1569 $ row 4 | o o o d d d o o o o o o 1570 $ row 5 | o o o d d d o o o o o o 1571 $ ------------------- 1572 $ 1573 1574 Thus, any entries in the d locations are stored in the d (diagonal) 1575 submatrix, and any entries in the o locations are stored in the 1576 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1577 stored simply in the MATSEQAIJ format for compressed row storage. 1578 1579 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1580 and o_nz should indicate the number of nonzeros per row in the o matrix. 1581 In general, for PDE problems in which most nonzeros are near the diagonal, 1582 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1583 or you will get TERRIBLE performance; see the users' manual chapter on 1584 matrices. 1585 1586 .keywords: matrix, aij, compressed row, sparse, parallel 1587 1588 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1589 @*/ 1590 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1591 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1592 { 1593 Mat B; 1594 Mat_MPIAIJ *b; 1595 int ierr, i,sum[2],work[2],size; 1596 1597 PetscFunctionBegin; 1598 MPI_Comm_size(comm,&size); 1599 if (size == 1) { 1600 if (M == PETSC_DECIDE) M = m; 1601 if (N == PETSC_DECIDE) N = n; 1602 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 *A = 0; 1607 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1608 PLogObjectCreate(B); 1609 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1610 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1611 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1612 B->destroy = MatDestroy_MPIAIJ; 1613 B->view = MatView_MPIAIJ; 1614 B->factor = 0; 1615 B->assembled = PETSC_FALSE; 1616 B->mapping = 0; 1617 1618 B->insertmode = NOT_SET_VALUES; 1619 b->size = size; 1620 MPI_Comm_rank(comm,&b->rank); 1621 1622 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1623 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1624 } 1625 1626 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1627 work[0] = m; work[1] = n; 1628 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1629 if (M == PETSC_DECIDE) M = sum[0]; 1630 if (N == PETSC_DECIDE) N = sum[1]; 1631 } 1632 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1633 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1634 b->m = m; B->m = m; 1635 b->n = n; B->n = n; 1636 b->N = N; B->N = N; 1637 b->M = M; B->M = M; 1638 1639 /* build local table of row and column ownerships */ 1640 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1641 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1642 b->cowners = b->rowners + b->size + 2; 1643 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1644 b->rowners[0] = 0; 1645 for ( i=2; i<=b->size; i++ ) { 1646 b->rowners[i] += b->rowners[i-1]; 1647 } 1648 b->rstart = b->rowners[b->rank]; 1649 b->rend = b->rowners[b->rank+1]; 1650 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1651 b->cowners[0] = 0; 1652 for ( i=2; i<=b->size; i++ ) { 1653 b->cowners[i] += b->cowners[i-1]; 1654 } 1655 b->cstart = b->cowners[b->rank]; 1656 b->cend = b->cowners[b->rank+1]; 1657 1658 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1659 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1660 PLogObjectParent(B,b->A); 1661 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1662 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1663 PLogObjectParent(B,b->B); 1664 1665 /* build cache for off array entries formed */ 1666 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1667 b->donotstash = 0; 1668 b->colmap = 0; 1669 b->garray = 0; 1670 b->roworiented = 1; 1671 1672 /* stuff used for matrix vector multiply */ 1673 b->lvec = 0; 1674 b->Mvctx = 0; 1675 1676 /* stuff for MatGetRow() */ 1677 b->rowindices = 0; 1678 b->rowvalues = 0; 1679 b->getrowactive = PETSC_FALSE; 1680 1681 *A = B; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNC__ 1686 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1687 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1688 { 1689 Mat mat; 1690 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1691 int ierr, len=0, flg; 1692 1693 PetscFunctionBegin; 1694 *newmat = 0; 1695 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1696 PLogObjectCreate(mat); 1697 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1698 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1699 mat->destroy = MatDestroy_MPIAIJ; 1700 mat->view = MatView_MPIAIJ; 1701 mat->factor = matin->factor; 1702 mat->assembled = PETSC_TRUE; 1703 1704 a->m = mat->m = oldmat->m; 1705 a->n = mat->n = oldmat->n; 1706 a->M = mat->M = oldmat->M; 1707 a->N = mat->N = oldmat->N; 1708 1709 a->rstart = oldmat->rstart; 1710 a->rend = oldmat->rend; 1711 a->cstart = oldmat->cstart; 1712 a->cend = oldmat->cend; 1713 a->size = oldmat->size; 1714 a->rank = oldmat->rank; 1715 mat->insertmode = NOT_SET_VALUES; 1716 a->rowvalues = 0; 1717 a->getrowactive = PETSC_FALSE; 1718 1719 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1720 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1721 a->cowners = a->rowners + a->size + 2; 1722 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1723 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1724 if (oldmat->colmap) { 1725 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1726 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1727 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1728 } else a->colmap = 0; 1729 if (oldmat->garray) { 1730 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1731 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1732 PLogObjectMemory(mat,len*sizeof(int)); 1733 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1734 } else a->garray = 0; 1735 1736 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1737 PLogObjectParent(mat,a->lvec); 1738 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1739 PLogObjectParent(mat,a->Mvctx); 1740 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1741 PLogObjectParent(mat,a->A); 1742 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1743 PLogObjectParent(mat,a->B); 1744 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1745 if (flg) { 1746 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1747 } 1748 *newmat = mat; 1749 PetscFunctionReturn(0); 1750 } 1751 1752 #include "sys.h" 1753 1754 #undef __FUNC__ 1755 #define __FUNC__ "MatLoad_MPIAIJ" 1756 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1757 { 1758 Mat A; 1759 Scalar *vals,*svals; 1760 MPI_Comm comm = ((PetscObject)viewer)->comm; 1761 MPI_Status status; 1762 int i, nz, ierr, j,rstart, rend, fd; 1763 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1764 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1765 int tag = ((PetscObject)viewer)->tag; 1766 1767 PetscFunctionBegin; 1768 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1769 if (!rank) { 1770 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1771 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1772 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1773 if (header[3] < 0) { 1774 SETERRQ(1,1,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 1775 } 1776 } 1777 1778 1779 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1780 M = header[1]; N = header[2]; 1781 /* determine ownership of all rows */ 1782 m = M/size + ((M % size) > rank); 1783 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1784 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1785 rowners[0] = 0; 1786 for ( i=2; i<=size; i++ ) { 1787 rowners[i] += rowners[i-1]; 1788 } 1789 rstart = rowners[rank]; 1790 rend = rowners[rank+1]; 1791 1792 /* distribute row lengths to all processors */ 1793 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1794 offlens = ourlens + (rend-rstart); 1795 if (!rank) { 1796 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1797 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1798 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1799 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1800 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1801 PetscFree(sndcounts); 1802 } else { 1803 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1804 } 1805 1806 if (!rank) { 1807 /* calculate the number of nonzeros on each processor */ 1808 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1809 PetscMemzero(procsnz,size*sizeof(int)); 1810 for ( i=0; i<size; i++ ) { 1811 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1812 procsnz[i] += rowlengths[j]; 1813 } 1814 } 1815 PetscFree(rowlengths); 1816 1817 /* determine max buffer needed and allocate it */ 1818 maxnz = 0; 1819 for ( i=0; i<size; i++ ) { 1820 maxnz = PetscMax(maxnz,procsnz[i]); 1821 } 1822 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1823 1824 /* read in my part of the matrix column indices */ 1825 nz = procsnz[0]; 1826 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1827 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1828 1829 /* read in every one elses and ship off */ 1830 for ( i=1; i<size; i++ ) { 1831 nz = procsnz[i]; 1832 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1833 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1834 } 1835 PetscFree(cols); 1836 } else { 1837 /* determine buffer space needed for message */ 1838 nz = 0; 1839 for ( i=0; i<m; i++ ) { 1840 nz += ourlens[i]; 1841 } 1842 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1843 1844 /* receive message of column indices*/ 1845 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1846 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1847 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1848 } 1849 1850 /* loop over local rows, determining number of off diagonal entries */ 1851 PetscMemzero(offlens,m*sizeof(int)); 1852 jj = 0; 1853 for ( i=0; i<m; i++ ) { 1854 for ( j=0; j<ourlens[i]; j++ ) { 1855 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1856 jj++; 1857 } 1858 } 1859 1860 /* create our matrix */ 1861 for ( i=0; i<m; i++ ) { 1862 ourlens[i] -= offlens[i]; 1863 } 1864 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1865 A = *newmat; 1866 MatSetOption(A,MAT_COLUMNS_SORTED); 1867 for ( i=0; i<m; i++ ) { 1868 ourlens[i] += offlens[i]; 1869 } 1870 1871 if (!rank) { 1872 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1873 1874 /* read in my part of the matrix numerical values */ 1875 nz = procsnz[0]; 1876 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1877 1878 /* insert into matrix */ 1879 jj = rstart; 1880 smycols = mycols; 1881 svals = vals; 1882 for ( i=0; i<m; i++ ) { 1883 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1884 smycols += ourlens[i]; 1885 svals += ourlens[i]; 1886 jj++; 1887 } 1888 1889 /* read in other processors and ship out */ 1890 for ( i=1; i<size; i++ ) { 1891 nz = procsnz[i]; 1892 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1893 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1894 } 1895 PetscFree(procsnz); 1896 } else { 1897 /* receive numeric values */ 1898 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1899 1900 /* receive message of values*/ 1901 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1902 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1903 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1904 1905 /* insert into matrix */ 1906 jj = rstart; 1907 smycols = mycols; 1908 svals = vals; 1909 for ( i=0; i<m; i++ ) { 1910 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1911 smycols += ourlens[i]; 1912 svals += ourlens[i]; 1913 jj++; 1914 } 1915 } 1916 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1917 1918 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1919 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #undef __FUNC__ 1924 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1925 /* 1926 Not great since it makes two copies of the submatrix, first an SeqAIJ 1927 in local and then by concatenating the local matrices the end result. 1928 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1929 */ 1930 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatGetSubMatrixCall call,Mat *newmat) 1931 { 1932 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1933 Mat *local,M; 1934 Scalar *vwork,*aa; 1935 MPI_Comm comm = mat->comm; 1936 Mat_SeqAIJ *aij; 1937 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1938 1939 PetscFunctionBegin; 1940 MPI_Comm_rank(comm,&rank); 1941 MPI_Comm_size(comm,&size); 1942 1943 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1944 1945 /* 1946 m - number of local rows 1947 n - number of columns (same on all processors) 1948 rstart - first row in new global matrix generated 1949 */ 1950 ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr); 1951 if (call == MAT_INITIAL_MATRIX) { 1952 aij = (Mat_SeqAIJ *) (*local)->data; 1953 if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 1954 ii = aij->i; 1955 jj = aij->j; 1956 1957 /* 1958 Determine the number of non-zeros in the diagonal and off-diagonal 1959 portions of the matrix in order to do correct preallocation 1960 */ 1961 1962 /* first get start and end of "diagonal" columns */ 1963 nlocal = n/size + ((n % size) > rank); 1964 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1965 rstart = rend - nlocal; 1966 1967 /* next, compute all the lengths */ 1968 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 1969 olens = dlens + m; 1970 for ( i=0; i<m; i++ ) { 1971 jend = ii[i+1] - ii[i]; 1972 olen = 0; 1973 dlen = 0; 1974 for ( j=0; j<jend; j++ ) { 1975 if ( *jj < rstart || *jj >= rend) olen++; 1976 else dlen++; 1977 jj++; 1978 } 1979 olens[i] = olen; 1980 dlens[i] = dlen; 1981 } 1982 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1983 PetscFree(dlens); 1984 } else { 1985 int ml,nl; 1986 1987 M = *newmat; 1988 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1989 if (ml != m) SETERRQ(1,1,"Previous matrix must be same size/layout as request"); 1990 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1991 } 1992 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 1993 aij = (Mat_SeqAIJ *) (*local)->data; 1994 if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 1995 ii = aij->i; 1996 jj = aij->j; 1997 aa = aij->a; 1998 for (i=0; i<m; i++) { 1999 row = rstart + i; 2000 nz = ii[i+1] - ii[i]; 2001 cwork = jj; jj += nz; 2002 vwork = aa; aa += nz; 2003 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2004 } 2005 2006 ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr); 2007 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2008 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2009 *newmat = M; 2010 PetscFunctionReturn(0); 2011 } 2012