1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.223 1997/10/29 14:07:25 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 PetscFunctionBegin; 24 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 25 PLogObjectMemory(mat,aij->N*sizeof(int)); 26 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 27 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 28 PetscFunctionReturn(0); 29 } 30 31 extern int DisAssemble_MPIAIJ(Mat); 32 33 #define CHUNKSIZE 15 34 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 35 { \ 36 \ 37 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 38 rmax = aimax[row]; nrow = ailen[row]; \ 39 col1 = col - shift; \ 40 \ 41 low = 0; high = nrow; \ 42 while (high-low > 5) { \ 43 t = (low+high)/2; \ 44 if (rp[t] > col) high = t; \ 45 else low = t; \ 46 } \ 47 for ( _i=0; _i<nrow; _i++ ) { \ 48 if (rp[_i] > col1) break; \ 49 if (rp[_i] == col1) { \ 50 if (addv == ADD_VALUES) ap[_i] += value; \ 51 else ap[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (nonew == 1) goto a_noinsert; \ 56 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 57 if (nrow >= rmax) { \ 58 /* there is no extra room in row, therefore enlarge */ \ 59 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 60 Scalar *new_a; \ 61 \ 62 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 63 \ 64 /* malloc new storage space */ \ 65 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 66 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 67 new_j = (int *) (new_a + new_nz); \ 68 new_i = new_j + new_nz; \ 69 \ 70 /* copy over old data into new slots */ \ 71 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 72 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 73 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 74 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 75 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 76 len*sizeof(int)); \ 77 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 78 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 79 len*sizeof(Scalar)); \ 80 /* free up old matrix storage */ \ 81 \ 82 PetscFree(a->a); \ 83 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 84 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 85 a->singlemalloc = 1; \ 86 \ 87 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 88 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 89 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 90 a->maxnz += CHUNKSIZE; \ 91 a->reallocs++; \ 92 } \ 93 N = nrow++ - 1; a->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for ( ii=N; ii>=_i; ii-- ) { \ 96 rp[ii+1] = rp[ii]; \ 97 ap[ii+1] = ap[ii]; \ 98 } \ 99 rp[_i] = col1; \ 100 ap[_i] = value; \ 101 a_noinsert: ; \ 102 ailen[row] = nrow; \ 103 } 104 105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 106 { \ 107 \ 108 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 109 rmax = bimax[row]; nrow = bilen[row]; \ 110 col1 = col - shift; \ 111 \ 112 low = 0; high = nrow; \ 113 while (high-low > 5) { \ 114 t = (low+high)/2; \ 115 if (rp[t] > col) high = t; \ 116 else low = t; \ 117 } \ 118 for ( _i=0; _i<nrow; _i++ ) { \ 119 if (rp[_i] > col1) break; \ 120 if (rp[_i] == col1) { \ 121 if (addv == ADD_VALUES) ap[_i] += value; \ 122 else ap[_i] = value; \ 123 goto b_noinsert; \ 124 } \ 125 } \ 126 if (nonew == 1) goto b_noinsert; \ 127 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 128 if (nrow >= rmax) { \ 129 /* there is no extra room in row, therefore enlarge */ \ 130 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 131 Scalar *new_a; \ 132 \ 133 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 134 \ 135 /* malloc new storage space */ \ 136 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 137 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 138 new_j = (int *) (new_a + new_nz); \ 139 new_i = new_j + new_nz; \ 140 \ 141 /* copy over old data into new slots */ \ 142 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 143 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 144 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 145 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 146 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 147 len*sizeof(int)); \ 148 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 149 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 150 len*sizeof(Scalar)); \ 151 /* free up old matrix storage */ \ 152 \ 153 PetscFree(b->a); \ 154 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 155 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 156 b->singlemalloc = 1; \ 157 \ 158 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 159 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 160 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 161 b->maxnz += CHUNKSIZE; \ 162 b->reallocs++; \ 163 } \ 164 N = nrow++ - 1; b->nz++; \ 165 /* shift up all the later entries in this row */ \ 166 for ( ii=N; ii>=_i; ii-- ) { \ 167 rp[ii+1] = rp[ii]; \ 168 ap[ii+1] = ap[ii]; \ 169 } \ 170 rp[_i] = col1; \ 171 ap[_i] = value; \ 172 b_noinsert: ; \ 173 bilen[row] = nrow; \ 174 } 175 176 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 177 #undef __FUNC__ 178 #define __FUNC__ "MatSetValues_MPIAIJ" 179 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 180 { 181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 182 Scalar value; 183 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 184 int cstart = aij->cstart, cend = aij->cend,row,col; 185 int roworiented = aij->roworiented; 186 187 /* Some Variables required in the macro */ 188 Mat A = aij->A; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 190 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 191 Scalar *aa = a->a; 192 193 Mat B = aij->B; 194 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 195 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 196 Scalar *ba = b->a; 197 198 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 199 int nonew = a->nonew,shift = a->indexshift; 200 Scalar *ap; 201 202 PetscFunctionBegin; 203 for ( i=0; i<m; i++ ) { 204 #if defined(USE_PETSC_BOPT_g) 205 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 206 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 207 #endif 208 if (im[i] >= rstart && im[i] < rend) { 209 row = im[i] - rstart; 210 for ( j=0; j<n; j++ ) { 211 if (in[j] >= cstart && in[j] < cend){ 212 col = in[j] - cstart; 213 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 214 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 215 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 216 } 217 #if defined(USE_PETSC_BOPT_g) 218 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 219 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 220 #endif 221 else { 222 if (mat->was_assembled) { 223 if (!aij->colmap) { 224 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 225 } 226 col = aij->colmap[in[j]] - 1; 227 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 228 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 229 col = in[j]; 230 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 231 B = aij->B; 232 b = (Mat_SeqAIJ *) B->data; 233 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 234 ba = b->a; 235 } 236 } 237 else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } 294 else { 295 SETERRQ(1,0,"Only local values currently supported"); 296 } 297 } 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 PetscFunctionBegin; 315 /* make sure all processors are either in INSERTMODE or ADDMODE */ 316 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 317 if (addv == (ADD_VALUES|INSERT_VALUES)) { 318 SETERRQ(1,0,"Some processors inserted others added"); 319 } 320 mat->insertmode = addv; /* in case this processor had no cache */ 321 322 /* first count number of contributors to each processor */ 323 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 324 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 325 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 326 for ( i=0; i<aij->stash.n; i++ ) { 327 idx = aij->stash.idx[i]; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 331 } 332 } 333 } 334 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 335 336 /* inform other processors of number of messages and max length*/ 337 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 338 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 339 nreceives = work[rank]; 340 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 341 nmax = work[rank]; 342 PetscFree(work); 343 344 /* post receives: 345 1) each message will consist of ordered pairs 346 (global index,value) we store the global index as a double 347 to simplify the message passing. 348 2) since we don't know how long each individual message is we 349 allocate the largest needed buffer for each receive. Potentially 350 this is a lot of wasted space. 351 352 353 This could be done better. 354 */ 355 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 357 for ( i=0; i<nreceives; i++ ) { 358 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 359 comm,recv_waits+i);CHKERRQ(ierr); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 368 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 369 starts[0] = 0; 370 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 for ( i=0; i<aij->stash.n; i++ ) { 372 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 373 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 374 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 375 } 376 PetscFree(owner); 377 starts[0] = 0; 378 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 PetscFunctionReturn(0); 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 PetscFunctionBegin; 412 /* wait on receives */ 413 while (count) { 414 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 415 /* unpack receives into our local space */ 416 values = aij->rvalues + 3*imdex*aij->rmax; 417 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 418 n = n/3; 419 for ( i=0; i<n; i++ ) { 420 row = (int) PetscReal(values[3*i]) - aij->rstart; 421 col = (int) PetscReal(values[3*i+1]); 422 val = values[3*i+2]; 423 if (col >= aij->cstart && col < aij->cend) { 424 col -= aij->cstart; 425 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 426 } else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 479 PetscFunctionBegin; 480 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 481 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 /* the code does not do the diagonal entries correctly unless the 486 matrix is square and the column and row owerships are identical. 487 This is a BUG. The only way to fix it seems to be to access 488 aij->A and aij->B directly and not through the MatZeroRows() 489 routine. 490 */ 491 #undef __FUNC__ 492 #define __FUNC__ "MatZeroRows_MPIAIJ" 493 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 494 { 495 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 496 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 497 int *procs,*nprocs,j,found,idx,nsends,*work; 498 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 499 int *rvalues,tag = A->tag,count,base,slen,n,*source; 500 int *lens,imdex,*lrows,*values; 501 MPI_Comm comm = A->comm; 502 MPI_Request *send_waits,*recv_waits; 503 MPI_Status recv_status,*send_status; 504 IS istmp; 505 506 PetscFunctionBegin; 507 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 508 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 509 510 /* first count number of contributors to each processor */ 511 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 512 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 513 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 514 for ( i=0; i<N; i++ ) { 515 idx = rows[i]; 516 found = 0; 517 for ( j=0; j<size; j++ ) { 518 if (idx >= owners[j] && idx < owners[j+1]) { 519 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 520 } 521 } 522 if (!found) SETERRQ(1,0,"Index out of range"); 523 } 524 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 525 526 /* inform other processors of number of messages and max length*/ 527 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 528 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 529 nrecvs = work[rank]; 530 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 531 nmax = work[rank]; 532 PetscFree(work); 533 534 /* post receives: */ 535 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 536 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 537 for ( i=0; i<nrecvs; i++ ) { 538 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 539 } 540 541 /* do sends: 542 1) starts[i] gives the starting index in svalues for stuff going to 543 the ith processor 544 */ 545 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 546 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 547 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 548 starts[0] = 0; 549 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 550 for ( i=0; i<N; i++ ) { 551 svalues[starts[owner[i]]++] = rows[i]; 552 } 553 ISRestoreIndices(is,&rows); 554 555 starts[0] = 0; 556 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 557 count = 0; 558 for ( i=0; i<size; i++ ) { 559 if (procs[i]) { 560 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 561 } 562 } 563 PetscFree(starts); 564 565 base = owners[rank]; 566 567 /* wait on receives */ 568 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 569 source = lens + nrecvs; 570 count = nrecvs; slen = 0; 571 while (count) { 572 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 573 /* unpack receives into our local space */ 574 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 575 source[imdex] = recv_status.MPI_SOURCE; 576 lens[imdex] = n; 577 slen += n; 578 count--; 579 } 580 PetscFree(recv_waits); 581 582 /* move the data into the send scatter */ 583 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 584 count = 0; 585 for ( i=0; i<nrecvs; i++ ) { 586 values = rvalues + i*nmax; 587 for ( j=0; j<lens[i]; j++ ) { 588 lrows[count++] = values[j] - base; 589 } 590 } 591 PetscFree(rvalues); PetscFree(lens); 592 PetscFree(owner); PetscFree(nprocs); 593 594 /* actually zap the local rows */ 595 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 596 PLogObjectParent(A,istmp); 597 PetscFree(lrows); 598 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 599 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 600 ierr = ISDestroy(istmp); CHKERRQ(ierr); 601 602 /* wait on sends */ 603 if (nsends) { 604 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 605 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 606 PetscFree(send_status); 607 } 608 PetscFree(send_waits); PetscFree(svalues); 609 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatMult_MPIAIJ" 615 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 616 { 617 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 618 int ierr,nt; 619 620 PetscFunctionBegin; 621 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 622 if (nt != a->n) { 623 SETERRQ(1,0,"Incompatible partition of A and xx"); 624 } 625 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 626 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 627 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 628 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 629 PetscFunctionReturn(0); 630 } 631 632 #undef __FUNC__ 633 #define __FUNC__ "MatMultAdd_MPIAIJ" 634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 635 { 636 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 637 int ierr; 638 639 PetscFunctionBegin; 640 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 641 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 642 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 643 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNC__ 648 #define __FUNC__ "MatMultTrans_MPIAIJ" 649 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 650 { 651 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 652 int ierr; 653 654 PetscFunctionBegin; 655 /* do nondiagonal part */ 656 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 657 /* send it on its way */ 658 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 659 /* do local part */ 660 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 661 /* receive remote parts: note this assumes the values are not actually */ 662 /* inserted in yy until the next line, which is true for my implementation*/ 663 /* but is not perhaps always true. */ 664 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 670 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 671 { 672 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 673 int ierr; 674 675 PetscFunctionBegin; 676 /* do nondiagonal part */ 677 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 678 /* send it on its way */ 679 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 680 /* do local part */ 681 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 682 /* receive remote parts: note this assumes the values are not actually */ 683 /* inserted in yy until the next line, which is true for my implementation*/ 684 /* but is not perhaps always true. */ 685 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 /* 690 This only works correctly for square matrices where the subblock A->A is the 691 diagonal block 692 */ 693 #undef __FUNC__ 694 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 695 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 696 { 697 int ierr; 698 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 699 700 PetscFunctionBegin; 701 if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 702 if (a->rstart != a->cstart || a->rend != a->cend) { 703 SETERRQ(1,0,"row partition must equal col partition"); 704 } 705 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNC__ 710 #define __FUNC__ "MatScale_MPIAIJ" 711 int MatScale_MPIAIJ(Scalar *aa,Mat A) 712 { 713 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 714 int ierr; 715 716 PetscFunctionBegin; 717 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 718 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNC__ 723 #define __FUNC__ "MatDestroy_MPIAIJ" 724 int MatDestroy_MPIAIJ(PetscObject obj) 725 { 726 Mat mat = (Mat) obj; 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 728 int ierr; 729 730 PetscFunctionBegin; 731 #if defined(USE_PETSC_LOG) 732 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 733 #endif 734 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 735 PetscFree(aij->rowners); 736 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 737 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 738 if (aij->colmap) PetscFree(aij->colmap); 739 if (aij->garray) PetscFree(aij->garray); 740 if (aij->lvec) VecDestroy(aij->lvec); 741 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 742 if (aij->rowvalues) PetscFree(aij->rowvalues); 743 PetscFree(aij); 744 PLogObjectDestroy(mat); 745 PetscHeaderDestroy(mat); 746 PetscFunctionReturn(0); 747 } 748 749 #undef __FUNC__ 750 #define __FUNC__ "MatView_MPIAIJ_Binary" 751 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 752 { 753 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 754 int ierr; 755 756 PetscFunctionBegin; 757 if (aij->size == 1) { 758 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 759 } 760 else SETERRQ(1,0,"Only uniprocessor output supported"); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNC__ 765 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 766 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 769 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 770 int ierr, format,shift = C->indexshift,rank; 771 FILE *fd; 772 ViewerType vtype; 773 774 PetscFunctionBegin; 775 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 776 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 777 ierr = ViewerGetFormat(viewer,&format); 778 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 779 MatInfo info; 780 int flg; 781 MPI_Comm_rank(mat->comm,&rank); 782 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 783 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 784 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 785 PetscSequentialPhaseBegin(mat->comm,1); 786 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 787 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 788 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 789 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 790 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 791 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 792 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 793 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 794 fflush(fd); 795 PetscSequentialPhaseEnd(mat->comm,1); 796 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 799 PetscFunctionReturn(0); 800 } 801 } 802 803 if (vtype == DRAW_VIEWER) { 804 Draw draw; 805 PetscTruth isnull; 806 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 807 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 808 } 809 810 if (vtype == ASCII_FILE_VIEWER) { 811 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 812 PetscSequentialPhaseBegin(mat->comm,1); 813 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 814 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 815 aij->cend); 816 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 817 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 818 fflush(fd); 819 PetscSequentialPhaseEnd(mat->comm,1); 820 } else { 821 int size = aij->size; 822 rank = aij->rank; 823 if (size == 1) { 824 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 825 } else { 826 /* assemble the entire matrix onto first processor. */ 827 Mat A; 828 Mat_SeqAIJ *Aloc; 829 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 830 Scalar *a; 831 832 if (!rank) { 833 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 834 CHKERRQ(ierr); 835 } else { 836 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 837 CHKERRQ(ierr); 838 } 839 PLogObjectParent(mat,A); 840 841 /* copy over the A part */ 842 Aloc = (Mat_SeqAIJ*) aij->A->data; 843 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 844 row = aij->rstart; 845 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 846 for ( i=0; i<m; i++ ) { 847 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 848 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 849 } 850 aj = Aloc->j; 851 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 852 853 /* copy over the B part */ 854 Aloc = (Mat_SeqAIJ*) aij->B->data; 855 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 856 row = aij->rstart; 857 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 858 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 859 for ( i=0; i<m; i++ ) { 860 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 861 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 862 } 863 PetscFree(ct); 864 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 865 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 866 if (!rank) { 867 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 868 } 869 ierr = MatDestroy(A); CHKERRQ(ierr); 870 } 871 } 872 PetscFunctionReturn(0); 873 } 874 875 #undef __FUNC__ 876 #define __FUNC__ "MatView_MPIAIJ" 877 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 878 { 879 Mat mat = (Mat) obj; 880 int ierr; 881 ViewerType vtype; 882 883 PetscFunctionBegin; 884 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 885 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 886 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 887 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 888 } 889 else if (vtype == BINARY_FILE_VIEWER) { 890 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 891 } 892 PetscFunctionReturn(0); 893 } 894 895 /* 896 This has to provide several versions. 897 898 2) a) use only local smoothing updating outer values only once. 899 b) local smoothing updating outer values each inner iteration 900 3) color updating out values betwen colors. 901 */ 902 #undef __FUNC__ 903 #define __FUNC__ "MatRelax_MPIAIJ" 904 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 905 double fshift,int its,Vec xx) 906 { 907 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 908 Mat AA = mat->A, BB = mat->B; 909 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 910 Scalar *b,*x,*xs,*ls,d,*v,sum; 911 int ierr,*idx, *diag; 912 int n = mat->n, m = mat->m, i,shift = A->indexshift; 913 914 PetscFunctionBegin; 915 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 916 xs = x + shift; /* shift by one for index start of 1 */ 917 ls = ls + shift; 918 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 919 diag = A->diag; 920 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 921 if (flag & SOR_ZERO_INITIAL_GUESS) { 922 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 926 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 927 while (its--) { 928 /* go down through the rows */ 929 for ( i=0; i<m; i++ ) { 930 n = A->i[i+1] - A->i[i]; 931 idx = A->j + A->i[i] + shift; 932 v = A->a + A->i[i] + shift; 933 sum = b[i]; 934 SPARSEDENSEMDOT(sum,xs,v,idx,n); 935 d = fshift + A->a[diag[i]+shift]; 936 n = B->i[i+1] - B->i[i]; 937 idx = B->j + B->i[i] + shift; 938 v = B->a + B->i[i] + shift; 939 SPARSEDENSEMDOT(sum,ls,v,idx,n); 940 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 941 } 942 /* come up through the rows */ 943 for ( i=m-1; i>-1; i-- ) { 944 n = A->i[i+1] - A->i[i]; 945 idx = A->j + A->i[i] + shift; 946 v = A->a + A->i[i] + shift; 947 sum = b[i]; 948 SPARSEDENSEMDOT(sum,xs,v,idx,n); 949 d = fshift + A->a[diag[i]+shift]; 950 n = B->i[i+1] - B->i[i]; 951 idx = B->j + B->i[i] + shift; 952 v = B->a + B->i[i] + shift; 953 SPARSEDENSEMDOT(sum,ls,v,idx,n); 954 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 955 } 956 } 957 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 958 if (flag & SOR_ZERO_INITIAL_GUESS) { 959 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 963 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 964 while (its--) { 965 for ( i=0; i<m; i++ ) { 966 n = A->i[i+1] - A->i[i]; 967 idx = A->j + A->i[i] + shift; 968 v = A->a + A->i[i] + shift; 969 sum = b[i]; 970 SPARSEDENSEMDOT(sum,xs,v,idx,n); 971 d = fshift + A->a[diag[i]+shift]; 972 n = B->i[i+1] - B->i[i]; 973 idx = B->j + B->i[i] + shift; 974 v = B->a + B->i[i] + shift; 975 SPARSEDENSEMDOT(sum,ls,v,idx,n); 976 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 977 } 978 } 979 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 980 if (flag & SOR_ZERO_INITIAL_GUESS) { 981 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 985 mat->Mvctx); CHKERRQ(ierr); 986 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 987 mat->Mvctx); CHKERRQ(ierr); 988 while (its--) { 989 for ( i=m-1; i>-1; i-- ) { 990 n = A->i[i+1] - A->i[i]; 991 idx = A->j + A->i[i] + shift; 992 v = A->a + A->i[i] + shift; 993 sum = b[i]; 994 SPARSEDENSEMDOT(sum,xs,v,idx,n); 995 d = fshift + A->a[diag[i]+shift]; 996 n = B->i[i+1] - B->i[i]; 997 idx = B->j + B->i[i] + shift; 998 v = B->a + B->i[i] + shift; 999 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1000 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1001 } 1002 } 1003 } else { 1004 SETERRQ(1,0,"Option not supported"); 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNC__ 1010 #define __FUNC__ "MatGetInfo_MPIAIJ" 1011 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1012 { 1013 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1014 Mat A = mat->A, B = mat->B; 1015 int ierr; 1016 double isend[5], irecv[5]; 1017 1018 PetscFunctionBegin; 1019 info->rows_global = (double)mat->M; 1020 info->columns_global = (double)mat->N; 1021 info->rows_local = (double)mat->m; 1022 info->columns_local = (double)mat->N; 1023 info->block_size = 1.0; 1024 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1025 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1026 isend[3] = info->memory; isend[4] = info->mallocs; 1027 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1028 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1029 isend[3] += info->memory; isend[4] += info->mallocs; 1030 if (flag == MAT_LOCAL) { 1031 info->nz_used = isend[0]; 1032 info->nz_allocated = isend[1]; 1033 info->nz_unneeded = isend[2]; 1034 info->memory = isend[3]; 1035 info->mallocs = isend[4]; 1036 } else if (flag == MAT_GLOBAL_MAX) { 1037 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1038 info->nz_used = irecv[0]; 1039 info->nz_allocated = irecv[1]; 1040 info->nz_unneeded = irecv[2]; 1041 info->memory = irecv[3]; 1042 info->mallocs = irecv[4]; 1043 } else if (flag == MAT_GLOBAL_SUM) { 1044 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1045 info->nz_used = irecv[0]; 1046 info->nz_allocated = irecv[1]; 1047 info->nz_unneeded = irecv[2]; 1048 info->memory = irecv[3]; 1049 info->mallocs = irecv[4]; 1050 } 1051 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1052 info->fill_ratio_needed = 0; 1053 info->factor_mallocs = 0; 1054 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNC__ 1059 #define __FUNC__ "MatSetOption_MPIAIJ" 1060 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1061 { 1062 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1063 1064 PetscFunctionBegin; 1065 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1066 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1067 op == MAT_COLUMNS_UNSORTED || 1068 op == MAT_COLUMNS_SORTED || 1069 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1070 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1071 MatSetOption(a->A,op); 1072 MatSetOption(a->B,op); 1073 } else if (op == MAT_ROW_ORIENTED) { 1074 a->roworiented = 1; 1075 MatSetOption(a->A,op); 1076 MatSetOption(a->B,op); 1077 } else if (op == MAT_ROWS_SORTED || 1078 op == MAT_ROWS_UNSORTED || 1079 op == MAT_SYMMETRIC || 1080 op == MAT_STRUCTURALLY_SYMMETRIC || 1081 op == MAT_YES_NEW_DIAGONALS) 1082 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1083 else if (op == MAT_COLUMN_ORIENTED) { 1084 a->roworiented = 0; 1085 MatSetOption(a->A,op); 1086 MatSetOption(a->B,op); 1087 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1088 a->donotstash = 1; 1089 } else if (op == MAT_NO_NEW_DIAGONALS){ 1090 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1091 } else { 1092 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1093 } 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNC__ 1098 #define __FUNC__ "MatGetSize_MPIAIJ" 1099 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1100 { 1101 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1102 1103 PetscFunctionBegin; 1104 if (m) *m = mat->M; 1105 if (n) *n = mat->N; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNC__ 1110 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1111 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1112 { 1113 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1114 1115 PetscFunctionBegin; 1116 if (m) *m = mat->m; 1117 if (n) *n = mat->N; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNC__ 1122 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1123 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1124 { 1125 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1126 1127 PetscFunctionBegin; 1128 *m = mat->rstart; *n = mat->rend; 1129 PetscFunctionReturn(0); 1130 } 1131 1132 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1133 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1134 1135 #undef __FUNC__ 1136 #define __FUNC__ "MatGetRow_MPIAIJ" 1137 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1138 { 1139 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1140 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1141 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1142 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1143 int *cmap, *idx_p; 1144 1145 PetscFunctionBegin; 1146 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1147 mat->getrowactive = PETSC_TRUE; 1148 1149 if (!mat->rowvalues && (idx || v)) { 1150 /* 1151 allocate enough space to hold information from the longest row. 1152 */ 1153 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1154 int max = 1,m = mat->m,tmp; 1155 for ( i=0; i<m; i++ ) { 1156 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1157 if (max < tmp) { max = tmp; } 1158 } 1159 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1160 CHKPTRQ(mat->rowvalues); 1161 mat->rowindices = (int *) (mat->rowvalues + max); 1162 } 1163 1164 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1165 lrow = row - rstart; 1166 1167 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1168 if (!v) {pvA = 0; pvB = 0;} 1169 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1170 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1171 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1172 nztot = nzA + nzB; 1173 1174 cmap = mat->garray; 1175 if (v || idx) { 1176 if (nztot) { 1177 /* Sort by increasing column numbers, assuming A and B already sorted */ 1178 int imark = -1; 1179 if (v) { 1180 *v = v_p = mat->rowvalues; 1181 for ( i=0; i<nzB; i++ ) { 1182 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1183 else break; 1184 } 1185 imark = i; 1186 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1187 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1188 } 1189 if (idx) { 1190 *idx = idx_p = mat->rowindices; 1191 if (imark > -1) { 1192 for ( i=0; i<imark; i++ ) { 1193 idx_p[i] = cmap[cworkB[i]]; 1194 } 1195 } else { 1196 for ( i=0; i<nzB; i++ ) { 1197 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1198 else break; 1199 } 1200 imark = i; 1201 } 1202 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1203 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1204 } 1205 } 1206 else { 1207 if (idx) *idx = 0; 1208 if (v) *v = 0; 1209 } 1210 } 1211 *nz = nztot; 1212 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1213 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNC__ 1218 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1219 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1220 { 1221 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1222 1223 PetscFunctionBegin; 1224 if (aij->getrowactive == PETSC_FALSE) { 1225 SETERRQ(1,0,"MatGetRow not called"); 1226 } 1227 aij->getrowactive = PETSC_FALSE; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNC__ 1232 #define __FUNC__ "MatNorm_MPIAIJ" 1233 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1234 { 1235 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1236 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1237 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1238 double sum = 0.0; 1239 Scalar *v; 1240 1241 PetscFunctionBegin; 1242 if (aij->size == 1) { 1243 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1244 } else { 1245 if (type == NORM_FROBENIUS) { 1246 v = amat->a; 1247 for (i=0; i<amat->nz; i++ ) { 1248 #if defined(USE_PETSC_COMPLEX) 1249 sum += real(conj(*v)*(*v)); v++; 1250 #else 1251 sum += (*v)*(*v); v++; 1252 #endif 1253 } 1254 v = bmat->a; 1255 for (i=0; i<bmat->nz; i++ ) { 1256 #if defined(USE_PETSC_COMPLEX) 1257 sum += real(conj(*v)*(*v)); v++; 1258 #else 1259 sum += (*v)*(*v); v++; 1260 #endif 1261 } 1262 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1263 *norm = sqrt(*norm); 1264 } else if (type == NORM_1) { /* max column norm */ 1265 double *tmp, *tmp2; 1266 int *jj, *garray = aij->garray; 1267 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1268 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1269 PetscMemzero(tmp,aij->N*sizeof(double)); 1270 *norm = 0.0; 1271 v = amat->a; jj = amat->j; 1272 for ( j=0; j<amat->nz; j++ ) { 1273 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1274 } 1275 v = bmat->a; jj = bmat->j; 1276 for ( j=0; j<bmat->nz; j++ ) { 1277 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1278 } 1279 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1280 for ( j=0; j<aij->N; j++ ) { 1281 if (tmp2[j] > *norm) *norm = tmp2[j]; 1282 } 1283 PetscFree(tmp); PetscFree(tmp2); 1284 } else if (type == NORM_INFINITY) { /* max row norm */ 1285 double ntemp = 0.0; 1286 for ( j=0; j<amat->m; j++ ) { 1287 v = amat->a + amat->i[j] + shift; 1288 sum = 0.0; 1289 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1290 sum += PetscAbsScalar(*v); v++; 1291 } 1292 v = bmat->a + bmat->i[j] + shift; 1293 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1294 sum += PetscAbsScalar(*v); v++; 1295 } 1296 if (sum > ntemp) ntemp = sum; 1297 } 1298 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1299 } else { 1300 SETERRQ(1,0,"No support for two norm"); 1301 } 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNC__ 1307 #define __FUNC__ "MatTranspose_MPIAIJ" 1308 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1309 { 1310 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1311 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1312 int ierr,shift = Aloc->indexshift; 1313 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1314 Mat B; 1315 Scalar *array; 1316 1317 PetscFunctionBegin; 1318 if (matout == PETSC_NULL && M != N) { 1319 SETERRQ(1,0,"Square matrix only for in-place"); 1320 } 1321 1322 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1323 1324 /* copy over the A part */ 1325 Aloc = (Mat_SeqAIJ*) a->A->data; 1326 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1327 row = a->rstart; 1328 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1329 for ( i=0; i<m; i++ ) { 1330 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1331 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1332 } 1333 aj = Aloc->j; 1334 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1335 1336 /* copy over the B part */ 1337 Aloc = (Mat_SeqAIJ*) a->B->data; 1338 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1339 row = a->rstart; 1340 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1341 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1342 for ( i=0; i<m; i++ ) { 1343 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1344 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1345 } 1346 PetscFree(ct); 1347 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1348 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1349 if (matout != PETSC_NULL) { 1350 *matout = B; 1351 } else { 1352 /* This isn't really an in-place transpose .... but free data structures from a */ 1353 PetscFree(a->rowners); 1354 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1355 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1356 if (a->colmap) PetscFree(a->colmap); 1357 if (a->garray) PetscFree(a->garray); 1358 if (a->lvec) VecDestroy(a->lvec); 1359 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1360 PetscFree(a); 1361 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1362 PetscHeaderDestroy(B); 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNC__ 1368 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1369 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1370 { 1371 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1372 Mat a = aij->A, b = aij->B; 1373 int ierr,s1,s2,s3; 1374 1375 PetscFunctionBegin; 1376 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1377 if (rr) { 1378 s3 = aij->n; 1379 VecGetLocalSize_Fast(rr,s1); 1380 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1381 /* Overlap communication with computation. */ 1382 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1383 } 1384 if (ll) { 1385 VecGetLocalSize_Fast(ll,s1); 1386 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1387 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1388 } 1389 /* scale the diagonal block */ 1390 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1391 1392 if (rr) { 1393 /* Do a scatter end and then right scale the off-diagonal block */ 1394 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1395 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1396 } 1397 1398 PetscFunctionReturn(0); 1399 } 1400 1401 1402 extern int MatPrintHelp_SeqAIJ(Mat); 1403 #undef __FUNC__ 1404 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1405 int MatPrintHelp_MPIAIJ(Mat A) 1406 { 1407 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1408 int ierr; 1409 1410 PetscFunctionBegin; 1411 if (!a->rank) { 1412 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1413 } 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNC__ 1418 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1419 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1420 { 1421 PetscFunctionBegin; 1422 *bs = 1; 1423 PetscFunctionReturn(0); 1424 } 1425 #undef __FUNC__ 1426 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1427 int MatSetUnfactored_MPIAIJ(Mat A) 1428 { 1429 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1430 int ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNC__ 1438 #define __FUNC__ "MatEqual_MPIAIJ" 1439 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1440 { 1441 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1442 Mat a, b, c, d; 1443 PetscTruth flg; 1444 int ierr; 1445 1446 PetscFunctionBegin; 1447 if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type"); 1448 a = matA->A; b = matA->B; 1449 c = matB->A; d = matB->B; 1450 1451 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1452 if (flg == PETSC_TRUE) { 1453 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1454 } 1455 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1460 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1461 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1462 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1463 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,MatGetSubMatrixCall,Mat *); 1464 1465 /* -------------------------------------------------------------------*/ 1466 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1467 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1468 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1469 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1470 0,0, 1471 0,0, 1472 0,0, 1473 MatRelax_MPIAIJ, 1474 MatTranspose_MPIAIJ, 1475 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1476 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1477 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1478 0, 1479 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1480 0,0,0,0, 1481 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1482 0,0, 1483 0,0,MatConvertSameType_MPIAIJ,0,0, 1484 0,0,0, 1485 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1486 MatPrintHelp_MPIAIJ, 1487 MatScale_MPIAIJ,0,0,0, 1488 MatGetBlockSize_MPIAIJ,0,0,0,0, 1489 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1490 0,0,MatGetSubMatrix_MPIAIJ}; 1491 1492 1493 #undef __FUNC__ 1494 #define __FUNC__ "MatCreateMPIAIJ" 1495 /*@C 1496 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1497 (the default parallel PETSc format). For good matrix assembly performance 1498 the user should preallocate the matrix storage by setting the parameters 1499 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1500 performance can be increased by more than a factor of 50. 1501 1502 Input Parameters: 1503 . comm - MPI communicator 1504 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1505 This value should be the same as the local size used in creating the 1506 y vector for the matrix-vector product y = Ax. 1507 . n - This value should be the same as the local size used in creating the 1508 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1509 calculated if N is given) For square matrices n is almost always m. 1510 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1511 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1512 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1513 (same for all local rows) 1514 . d_nzz - array containing the number of nonzeros in the various rows of the 1515 diagonal portion of the local submatrix (possibly different for each row) 1516 or PETSC_NULL. You must leave room for the diagonal entry even if 1517 it is zero. 1518 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1519 submatrix (same for all local rows). 1520 . o_nzz - array containing the number of nonzeros in the various rows of the 1521 off-diagonal portion of the local submatrix (possibly different for 1522 each row) or PETSC_NULL. 1523 1524 Output Parameter: 1525 . A - the matrix 1526 1527 Notes: 1528 The AIJ format (also called the Yale sparse matrix format or 1529 compressed row storage), is fully compatible with standard Fortran 77 1530 storage. That is, the stored row and column indices can begin at 1531 either one (as in Fortran) or zero. See the users manual for details. 1532 1533 The user MUST specify either the local or global matrix dimensions 1534 (possibly both). 1535 1536 By default, this format uses inodes (identical nodes) when possible. 1537 We search for consecutive rows with the same nonzero structure, thereby 1538 reusing matrix information to achieve increased efficiency. 1539 1540 Options Database Keys: 1541 $ -mat_aij_no_inode - Do not use inodes 1542 $ -mat_aij_inode_limit <limit> - Set inode limit. 1543 $ (max limit=5) 1544 $ -mat_aij_oneindex - Internally use indexing starting at 1 1545 $ rather than 0. Note: When calling MatSetValues(), 1546 $ the user still MUST index entries starting at 0! 1547 1548 Storage Information: 1549 For a square global matrix we define each processor's diagonal portion 1550 to be its local rows and the corresponding columns (a square submatrix); 1551 each processor's off-diagonal portion encompasses the remainder of the 1552 local matrix (a rectangular submatrix). 1553 1554 The user can specify preallocated storage for the diagonal part of 1555 the local submatrix with either d_nz or d_nnz (not both). Set 1556 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1557 memory allocation. Likewise, specify preallocated storage for the 1558 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1559 1560 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1561 the figure below we depict these three local rows and all columns (0-11). 1562 1563 $ 0 1 2 3 4 5 6 7 8 9 10 11 1564 $ ------------------- 1565 $ row 3 | o o o d d d o o o o o o 1566 $ row 4 | o o o d d d o o o o o o 1567 $ row 5 | o o o d d d o o o o o o 1568 $ ------------------- 1569 $ 1570 1571 Thus, any entries in the d locations are stored in the d (diagonal) 1572 submatrix, and any entries in the o locations are stored in the 1573 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1574 stored simply in the MATSEQAIJ format for compressed row storage. 1575 1576 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1577 and o_nz should indicate the number of nonzeros per row in the o matrix. 1578 In general, for PDE problems in which most nonzeros are near the diagonal, 1579 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1580 or you will get TERRIBLE performance; see the users' manual chapter on 1581 matrices. 1582 1583 .keywords: matrix, aij, compressed row, sparse, parallel 1584 1585 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1586 @*/ 1587 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1588 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1589 { 1590 Mat B; 1591 Mat_MPIAIJ *b; 1592 int ierr, i,sum[2],work[2],size; 1593 1594 PetscFunctionBegin; 1595 MPI_Comm_size(comm,&size); 1596 if (size == 1) { 1597 if (M == PETSC_DECIDE) M = m; 1598 if (N == PETSC_DECIDE) N = n; 1599 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1600 PetscFunctionReturn(0); 1601 } 1602 1603 *A = 0; 1604 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1605 PLogObjectCreate(B); 1606 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1607 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1608 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1609 B->destroy = MatDestroy_MPIAIJ; 1610 B->view = MatView_MPIAIJ; 1611 B->factor = 0; 1612 B->assembled = PETSC_FALSE; 1613 B->mapping = 0; 1614 1615 B->insertmode = NOT_SET_VALUES; 1616 b->size = size; 1617 MPI_Comm_rank(comm,&b->rank); 1618 1619 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1620 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1621 } 1622 1623 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1624 work[0] = m; work[1] = n; 1625 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1626 if (M == PETSC_DECIDE) M = sum[0]; 1627 if (N == PETSC_DECIDE) N = sum[1]; 1628 } 1629 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1630 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1631 b->m = m; B->m = m; 1632 b->n = n; B->n = n; 1633 b->N = N; B->N = N; 1634 b->M = M; B->M = M; 1635 1636 /* build local table of row and column ownerships */ 1637 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1638 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1639 b->cowners = b->rowners + b->size + 2; 1640 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1641 b->rowners[0] = 0; 1642 for ( i=2; i<=b->size; i++ ) { 1643 b->rowners[i] += b->rowners[i-1]; 1644 } 1645 b->rstart = b->rowners[b->rank]; 1646 b->rend = b->rowners[b->rank+1]; 1647 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1648 b->cowners[0] = 0; 1649 for ( i=2; i<=b->size; i++ ) { 1650 b->cowners[i] += b->cowners[i-1]; 1651 } 1652 b->cstart = b->cowners[b->rank]; 1653 b->cend = b->cowners[b->rank+1]; 1654 1655 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1656 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1657 PLogObjectParent(B,b->A); 1658 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1659 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1660 PLogObjectParent(B,b->B); 1661 1662 /* build cache for off array entries formed */ 1663 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1664 b->donotstash = 0; 1665 b->colmap = 0; 1666 b->garray = 0; 1667 b->roworiented = 1; 1668 1669 /* stuff used for matrix vector multiply */ 1670 b->lvec = 0; 1671 b->Mvctx = 0; 1672 1673 /* stuff for MatGetRow() */ 1674 b->rowindices = 0; 1675 b->rowvalues = 0; 1676 b->getrowactive = PETSC_FALSE; 1677 1678 *A = B; 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNC__ 1683 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1684 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1685 { 1686 Mat mat; 1687 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1688 int ierr, len=0, flg; 1689 1690 PetscFunctionBegin; 1691 *newmat = 0; 1692 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1693 PLogObjectCreate(mat); 1694 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1695 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1696 mat->destroy = MatDestroy_MPIAIJ; 1697 mat->view = MatView_MPIAIJ; 1698 mat->factor = matin->factor; 1699 mat->assembled = PETSC_TRUE; 1700 1701 a->m = mat->m = oldmat->m; 1702 a->n = mat->n = oldmat->n; 1703 a->M = mat->M = oldmat->M; 1704 a->N = mat->N = oldmat->N; 1705 1706 a->rstart = oldmat->rstart; 1707 a->rend = oldmat->rend; 1708 a->cstart = oldmat->cstart; 1709 a->cend = oldmat->cend; 1710 a->size = oldmat->size; 1711 a->rank = oldmat->rank; 1712 mat->insertmode = NOT_SET_VALUES; 1713 a->rowvalues = 0; 1714 a->getrowactive = PETSC_FALSE; 1715 1716 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1717 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1718 a->cowners = a->rowners + a->size + 2; 1719 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1720 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1721 if (oldmat->colmap) { 1722 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1723 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1724 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1725 } else a->colmap = 0; 1726 if (oldmat->garray) { 1727 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1728 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1729 PLogObjectMemory(mat,len*sizeof(int)); 1730 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1731 } else a->garray = 0; 1732 1733 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1734 PLogObjectParent(mat,a->lvec); 1735 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1736 PLogObjectParent(mat,a->Mvctx); 1737 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1738 PLogObjectParent(mat,a->A); 1739 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1740 PLogObjectParent(mat,a->B); 1741 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1742 if (flg) { 1743 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1744 } 1745 *newmat = mat; 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #include "sys.h" 1750 1751 #undef __FUNC__ 1752 #define __FUNC__ "MatLoad_MPIAIJ" 1753 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1754 { 1755 Mat A; 1756 Scalar *vals,*svals; 1757 MPI_Comm comm = ((PetscObject)viewer)->comm; 1758 MPI_Status status; 1759 int i, nz, ierr, j,rstart, rend, fd; 1760 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1761 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1762 int tag = ((PetscObject)viewer)->tag; 1763 1764 PetscFunctionBegin; 1765 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1766 if (!rank) { 1767 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1768 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1769 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1770 if (header[3] < 0) { 1771 SETERRQ(1,1,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 1772 } 1773 } 1774 1775 1776 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1777 M = header[1]; N = header[2]; 1778 /* determine ownership of all rows */ 1779 m = M/size + ((M % size) > rank); 1780 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1781 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1782 rowners[0] = 0; 1783 for ( i=2; i<=size; i++ ) { 1784 rowners[i] += rowners[i-1]; 1785 } 1786 rstart = rowners[rank]; 1787 rend = rowners[rank+1]; 1788 1789 /* distribute row lengths to all processors */ 1790 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1791 offlens = ourlens + (rend-rstart); 1792 if (!rank) { 1793 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1794 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1795 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1796 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1797 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1798 PetscFree(sndcounts); 1799 } else { 1800 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1801 } 1802 1803 if (!rank) { 1804 /* calculate the number of nonzeros on each processor */ 1805 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1806 PetscMemzero(procsnz,size*sizeof(int)); 1807 for ( i=0; i<size; i++ ) { 1808 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1809 procsnz[i] += rowlengths[j]; 1810 } 1811 } 1812 PetscFree(rowlengths); 1813 1814 /* determine max buffer needed and allocate it */ 1815 maxnz = 0; 1816 for ( i=0; i<size; i++ ) { 1817 maxnz = PetscMax(maxnz,procsnz[i]); 1818 } 1819 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1820 1821 /* read in my part of the matrix column indices */ 1822 nz = procsnz[0]; 1823 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1824 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1825 1826 /* read in every one elses and ship off */ 1827 for ( i=1; i<size; i++ ) { 1828 nz = procsnz[i]; 1829 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1830 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1831 } 1832 PetscFree(cols); 1833 } else { 1834 /* determine buffer space needed for message */ 1835 nz = 0; 1836 for ( i=0; i<m; i++ ) { 1837 nz += ourlens[i]; 1838 } 1839 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1840 1841 /* receive message of column indices*/ 1842 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1843 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1844 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1845 } 1846 1847 /* loop over local rows, determining number of off diagonal entries */ 1848 PetscMemzero(offlens,m*sizeof(int)); 1849 jj = 0; 1850 for ( i=0; i<m; i++ ) { 1851 for ( j=0; j<ourlens[i]; j++ ) { 1852 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1853 jj++; 1854 } 1855 } 1856 1857 /* create our matrix */ 1858 for ( i=0; i<m; i++ ) { 1859 ourlens[i] -= offlens[i]; 1860 } 1861 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1862 A = *newmat; 1863 MatSetOption(A,MAT_COLUMNS_SORTED); 1864 for ( i=0; i<m; i++ ) { 1865 ourlens[i] += offlens[i]; 1866 } 1867 1868 if (!rank) { 1869 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1870 1871 /* read in my part of the matrix numerical values */ 1872 nz = procsnz[0]; 1873 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1874 1875 /* insert into matrix */ 1876 jj = rstart; 1877 smycols = mycols; 1878 svals = vals; 1879 for ( i=0; i<m; i++ ) { 1880 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1881 smycols += ourlens[i]; 1882 svals += ourlens[i]; 1883 jj++; 1884 } 1885 1886 /* read in other processors and ship out */ 1887 for ( i=1; i<size; i++ ) { 1888 nz = procsnz[i]; 1889 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1890 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1891 } 1892 PetscFree(procsnz); 1893 } else { 1894 /* receive numeric values */ 1895 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1896 1897 /* receive message of values*/ 1898 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1899 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1900 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1901 1902 /* insert into matrix */ 1903 jj = rstart; 1904 smycols = mycols; 1905 svals = vals; 1906 for ( i=0; i<m; i++ ) { 1907 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1908 smycols += ourlens[i]; 1909 svals += ourlens[i]; 1910 jj++; 1911 } 1912 } 1913 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1914 1915 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1916 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNC__ 1921 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1922 /* 1923 Not great since it makes two copies of the submatrix, first an SeqAIJ 1924 in local and then by concatenating the local matrices the end result. 1925 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1926 */ 1927 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatGetSubMatrixCall call,Mat *newmat) 1928 { 1929 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1930 Mat *local,M; 1931 Scalar *vwork,*aa; 1932 MPI_Comm comm = mat->comm; 1933 Mat_SeqAIJ *aij; 1934 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1935 1936 PetscFunctionBegin; 1937 MPI_Comm_rank(comm,&rank); 1938 MPI_Comm_size(comm,&size); 1939 1940 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1941 1942 /* 1943 m - number of local rows 1944 n - number of columns (same on all processors) 1945 rstart - first row in new global matrix generated 1946 */ 1947 ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr); 1948 if (call == MAT_INITIAL_MATRIX) { 1949 aij = (Mat_SeqAIJ *) (*local)->data; 1950 if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 1951 ii = aij->i; 1952 jj = aij->j; 1953 1954 /* 1955 Determine the number of non-zeros in the diagonal and off-diagonal 1956 portions of the matrix in order to do correct preallocation 1957 */ 1958 1959 /* first get start and end of "diagonal" columns */ 1960 nlocal = n/size + ((n % size) > rank); 1961 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1962 rstart = rend - nlocal; 1963 1964 /* next, compute all the lengths */ 1965 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 1966 olens = dlens + m; 1967 for ( i=0; i<m; i++ ) { 1968 jend = ii[i+1] - ii[i]; 1969 olen = 0; 1970 dlen = 0; 1971 for ( j=0; j<jend; j++ ) { 1972 if ( *jj < rstart || *jj >= rend) olen++; 1973 else dlen++; 1974 jj++; 1975 } 1976 olens[i] = olen; 1977 dlens[i] = dlen; 1978 } 1979 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1980 PetscFree(dlens); 1981 } else { 1982 int ml,nl; 1983 1984 M = *newmat; 1985 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1986 if (ml != m) SETERRQ(1,1,"Previous matrix must be same size/layout as request"); 1987 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1988 } 1989 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 1990 aij = (Mat_SeqAIJ *) (*local)->data; 1991 if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 1992 ii = aij->i; 1993 jj = aij->j; 1994 aa = aij->a; 1995 for (i=0; i<m; i++) { 1996 row = rstart + i; 1997 nz = ii[i+1] - ii[i]; 1998 cwork = jj; jj += nz; 1999 vwork = aa; aa += nz; 2000 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2001 } 2002 2003 ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr); 2004 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2005 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2006 *newmat = M; 2007 PetscFunctionReturn(0); 2008 } 2009