1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.222 1997/10/28 14:22:37 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 PetscFunctionBegin; 24 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 25 PLogObjectMemory(mat,aij->N*sizeof(int)); 26 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 27 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 28 PetscFunctionReturn(0); 29 } 30 31 extern int DisAssemble_MPIAIJ(Mat); 32 33 #define CHUNKSIZE 15 34 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 35 { \ 36 \ 37 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 38 rmax = aimax[row]; nrow = ailen[row]; \ 39 col1 = col - shift; \ 40 \ 41 low = 0; high = nrow; \ 42 while (high-low > 5) { \ 43 t = (low+high)/2; \ 44 if (rp[t] > col) high = t; \ 45 else low = t; \ 46 } \ 47 for ( _i=0; _i<nrow; _i++ ) { \ 48 if (rp[_i] > col1) break; \ 49 if (rp[_i] == col1) { \ 50 if (addv == ADD_VALUES) ap[_i] += value; \ 51 else ap[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (nonew == 1) goto a_noinsert; \ 56 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 57 if (nrow >= rmax) { \ 58 /* there is no extra room in row, therefore enlarge */ \ 59 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 60 Scalar *new_a; \ 61 \ 62 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 63 \ 64 /* malloc new storage space */ \ 65 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 66 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 67 new_j = (int *) (new_a + new_nz); \ 68 new_i = new_j + new_nz; \ 69 \ 70 /* copy over old data into new slots */ \ 71 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 72 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 73 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 74 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 75 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 76 len*sizeof(int)); \ 77 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 78 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 79 len*sizeof(Scalar)); \ 80 /* free up old matrix storage */ \ 81 \ 82 PetscFree(a->a); \ 83 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 84 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 85 a->singlemalloc = 1; \ 86 \ 87 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 88 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 89 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 90 a->maxnz += CHUNKSIZE; \ 91 a->reallocs++; \ 92 } \ 93 N = nrow++ - 1; a->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for ( ii=N; ii>=_i; ii-- ) { \ 96 rp[ii+1] = rp[ii]; \ 97 ap[ii+1] = ap[ii]; \ 98 } \ 99 rp[_i] = col1; \ 100 ap[_i] = value; \ 101 a_noinsert: ; \ 102 ailen[row] = nrow; \ 103 } 104 105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 106 { \ 107 \ 108 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 109 rmax = bimax[row]; nrow = bilen[row]; \ 110 col1 = col - shift; \ 111 \ 112 low = 0; high = nrow; \ 113 while (high-low > 5) { \ 114 t = (low+high)/2; \ 115 if (rp[t] > col) high = t; \ 116 else low = t; \ 117 } \ 118 for ( _i=0; _i<nrow; _i++ ) { \ 119 if (rp[_i] > col1) break; \ 120 if (rp[_i] == col1) { \ 121 if (addv == ADD_VALUES) ap[_i] += value; \ 122 else ap[_i] = value; \ 123 goto b_noinsert; \ 124 } \ 125 } \ 126 if (nonew == 1) goto b_noinsert; \ 127 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 128 if (nrow >= rmax) { \ 129 /* there is no extra room in row, therefore enlarge */ \ 130 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 131 Scalar *new_a; \ 132 \ 133 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 134 \ 135 /* malloc new storage space */ \ 136 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 137 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 138 new_j = (int *) (new_a + new_nz); \ 139 new_i = new_j + new_nz; \ 140 \ 141 /* copy over old data into new slots */ \ 142 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 143 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 144 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 145 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 146 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 147 len*sizeof(int)); \ 148 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 149 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 150 len*sizeof(Scalar)); \ 151 /* free up old matrix storage */ \ 152 \ 153 PetscFree(b->a); \ 154 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 155 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 156 b->singlemalloc = 1; \ 157 \ 158 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 159 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 160 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 161 b->maxnz += CHUNKSIZE; \ 162 b->reallocs++; \ 163 } \ 164 N = nrow++ - 1; b->nz++; \ 165 /* shift up all the later entries in this row */ \ 166 for ( ii=N; ii>=_i; ii-- ) { \ 167 rp[ii+1] = rp[ii]; \ 168 ap[ii+1] = ap[ii]; \ 169 } \ 170 rp[_i] = col1; \ 171 ap[_i] = value; \ 172 b_noinsert: ; \ 173 bilen[row] = nrow; \ 174 } 175 176 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 177 #undef __FUNC__ 178 #define __FUNC__ "MatSetValues_MPIAIJ" 179 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 180 { 181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 182 Scalar value; 183 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 184 int cstart = aij->cstart, cend = aij->cend,row,col; 185 int roworiented = aij->roworiented; 186 187 /* Some Variables required in the macro */ 188 Mat A = aij->A; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 190 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 191 Scalar *aa = a->a; 192 193 Mat B = aij->B; 194 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 195 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 196 Scalar *ba = b->a; 197 198 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 199 int nonew = a->nonew,shift = a->indexshift; 200 Scalar *ap; 201 202 PetscFunctionBegin; 203 for ( i=0; i<m; i++ ) { 204 #if defined(USE_PETSC_BOPT_g) 205 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 206 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 207 #endif 208 if (im[i] >= rstart && im[i] < rend) { 209 row = im[i] - rstart; 210 for ( j=0; j<n; j++ ) { 211 if (in[j] >= cstart && in[j] < cend){ 212 col = in[j] - cstart; 213 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 214 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 215 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 216 } 217 #if defined(USE_PETSC_BOPT_g) 218 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 219 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 220 #endif 221 else { 222 if (mat->was_assembled) { 223 if (!aij->colmap) { 224 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 225 } 226 col = aij->colmap[in[j]] - 1; 227 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 228 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 229 col = in[j]; 230 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 231 B = aij->B; 232 b = (Mat_SeqAIJ *) B->data; 233 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 234 ba = b->a; 235 } 236 } 237 else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } 294 else { 295 SETERRQ(1,0,"Only local values currently supported"); 296 } 297 } 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 PetscFunctionBegin; 315 /* make sure all processors are either in INSERTMODE or ADDMODE */ 316 MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm); 317 if (addv == (ADD_VALUES|INSERT_VALUES)) { 318 SETERRQ(1,0,"Some processors inserted others added"); 319 } 320 mat->insertmode = addv; /* in case this processor had no cache */ 321 322 /* first count number of contributors to each processor */ 323 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 324 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 325 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 326 for ( i=0; i<aij->stash.n; i++ ) { 327 idx = aij->stash.idx[i]; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 331 } 332 } 333 } 334 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 335 336 /* inform other processors of number of messages and max length*/ 337 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 338 MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm); 339 nreceives = work[rank]; 340 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 341 nmax = work[rank]; 342 PetscFree(work); 343 344 /* post receives: 345 1) each message will consist of ordered pairs 346 (global index,value) we store the global index as a double 347 to simplify the message passing. 348 2) since we don't know how long each individual message is we 349 allocate the largest needed buffer for each receive. Potentially 350 this is a lot of wasted space. 351 352 353 This could be done better. 354 */ 355 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar)); 356 CHKPTRQ(rvalues); 357 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request)); 358 CHKPTRQ(recv_waits); 359 for ( i=0; i<nreceives; i++ ) { 360 MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 361 comm,recv_waits+i); 362 } 363 364 /* do sends: 365 1) starts[i] gives the starting index in svalues for stuff going to 366 the ith processor 367 */ 368 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 369 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request)); 370 CHKPTRQ(send_waits); 371 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 372 starts[0] = 0; 373 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 374 for ( i=0; i<aij->stash.n; i++ ) { 375 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 376 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 377 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 378 } 379 PetscFree(owner); 380 starts[0] = 0; 381 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 382 count = 0; 383 for ( i=0; i<size; i++ ) { 384 if (procs[i]) { 385 MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag, 386 comm,send_waits+count++); 387 } 388 } 389 PetscFree(starts); PetscFree(nprocs); 390 391 /* Free cache space */ 392 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 393 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 394 395 aij->svalues = svalues; aij->rvalues = rvalues; 396 aij->nsends = nsends; aij->nrecvs = nreceives; 397 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 398 aij->rmax = nmax; 399 400 PetscFunctionReturn(0); 401 } 402 extern int MatSetUpMultiply_MPIAIJ(Mat); 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 406 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 409 MPI_Status *send_status,recv_status; 410 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 411 int row,col,other_disassembled; 412 Scalar *values,val; 413 InsertMode addv = mat->insertmode; 414 415 PetscFunctionBegin; 416 /* wait on receives */ 417 while (count) { 418 MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status); 419 /* unpack receives into our local space */ 420 values = aij->rvalues + 3*imdex*aij->rmax; 421 MPI_Get_count(&recv_status,MPIU_SCALAR,&n); 422 n = n/3; 423 for ( i=0; i<n; i++ ) { 424 row = (int) PetscReal(values[3*i]) - aij->rstart; 425 col = (int) PetscReal(values[3*i+1]); 426 val = values[3*i+2]; 427 if (col >= aij->cstart && col < aij->cend) { 428 col -= aij->cstart; 429 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 430 } else { 431 if (mat->was_assembled || mat->assembled) { 432 if (!aij->colmap) { 433 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 434 } 435 col = aij->colmap[col] - 1; 436 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 437 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 438 col = (int) PetscReal(values[3*i+1]); 439 } 440 } 441 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 442 } 443 } 444 count--; 445 } 446 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 447 448 /* wait on sends */ 449 if (aij->nsends) { 450 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 451 MPI_Waitall(aij->nsends,aij->send_waits,send_status); 452 PetscFree(send_status); 453 } 454 PetscFree(aij->send_waits); PetscFree(aij->svalues); 455 456 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 457 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 458 459 /* determine if any processor has disassembled, if so we must 460 also disassemble ourselfs, in order that we may reassemble. */ 461 MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm); 462 if (mat->was_assembled && !other_disassembled) { 463 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 466 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 467 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 468 } 469 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 470 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 471 472 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNC__ 477 #define __FUNC__ "MatZeroEntries_MPIAIJ" 478 int MatZeroEntries_MPIAIJ(Mat A) 479 { 480 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 481 int ierr; 482 483 PetscFunctionBegin; 484 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 485 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 /* the code does not do the diagonal entries correctly unless the 490 matrix is square and the column and row owerships are identical. 491 This is a BUG. The only way to fix it seems to be to access 492 aij->A and aij->B directly and not through the MatZeroRows() 493 routine. 494 */ 495 #undef __FUNC__ 496 #define __FUNC__ "MatZeroRows_MPIAIJ" 497 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 498 { 499 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 500 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 501 int *procs,*nprocs,j,found,idx,nsends,*work; 502 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 503 int *rvalues,tag = A->tag,count,base,slen,n,*source; 504 int *lens,imdex,*lrows,*values; 505 MPI_Comm comm = A->comm; 506 MPI_Request *send_waits,*recv_waits; 507 MPI_Status recv_status,*send_status; 508 IS istmp; 509 510 PetscFunctionBegin; 511 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 512 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 513 514 /* first count number of contributors to each processor */ 515 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 516 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 517 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 518 for ( i=0; i<N; i++ ) { 519 idx = rows[i]; 520 found = 0; 521 for ( j=0; j<size; j++ ) { 522 if (idx >= owners[j] && idx < owners[j+1]) { 523 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 524 } 525 } 526 if (!found) SETERRQ(1,0,"Index out of range"); 527 } 528 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 529 530 /* inform other processors of number of messages and max length*/ 531 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 532 MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm); 533 nrecvs = work[rank]; 534 MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm); 535 nmax = work[rank]; 536 PetscFree(work); 537 538 /* post receives: */ 539 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 540 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request)); 541 CHKPTRQ(recv_waits); 542 for ( i=0; i<nrecvs; i++ ) { 543 MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i); 544 } 545 546 /* do sends: 547 1) starts[i] gives the starting index in svalues for stuff going to 548 the ith processor 549 */ 550 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 551 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 552 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 553 starts[0] = 0; 554 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 555 for ( i=0; i<N; i++ ) { 556 svalues[starts[owner[i]]++] = rows[i]; 557 } 558 ISRestoreIndices(is,&rows); 559 560 starts[0] = 0; 561 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 562 count = 0; 563 for ( i=0; i<size; i++ ) { 564 if (procs[i]) { 565 MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++); 566 } 567 } 568 PetscFree(starts); 569 570 base = owners[rank]; 571 572 /* wait on receives */ 573 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 574 source = lens + nrecvs; 575 count = nrecvs; slen = 0; 576 while (count) { 577 MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status); 578 /* unpack receives into our local space */ 579 MPI_Get_count(&recv_status,MPI_INT,&n); 580 source[imdex] = recv_status.MPI_SOURCE; 581 lens[imdex] = n; 582 slen += n; 583 count--; 584 } 585 PetscFree(recv_waits); 586 587 /* move the data into the send scatter */ 588 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 589 count = 0; 590 for ( i=0; i<nrecvs; i++ ) { 591 values = rvalues + i*nmax; 592 for ( j=0; j<lens[i]; j++ ) { 593 lrows[count++] = values[j] - base; 594 } 595 } 596 PetscFree(rvalues); PetscFree(lens); 597 PetscFree(owner); PetscFree(nprocs); 598 599 /* actually zap the local rows */ 600 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 601 PLogObjectParent(A,istmp); 602 PetscFree(lrows); 603 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 604 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 605 ierr = ISDestroy(istmp); CHKERRQ(ierr); 606 607 /* wait on sends */ 608 if (nsends) { 609 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status)); 610 CHKPTRQ(send_status); 611 MPI_Waitall(nsends,send_waits,send_status); 612 PetscFree(send_status); 613 } 614 PetscFree(send_waits); PetscFree(svalues); 615 616 PetscFunctionReturn(0); 617 } 618 619 #undef __FUNC__ 620 #define __FUNC__ "MatMult_MPIAIJ" 621 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 622 { 623 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 624 int ierr,nt; 625 626 PetscFunctionBegin; 627 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 628 if (nt != a->n) { 629 SETERRQ(1,0,"Incompatible partition of A and xx"); 630 } 631 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 632 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 633 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 634 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 635 PetscFunctionReturn(0); 636 } 637 638 #undef __FUNC__ 639 #define __FUNC__ "MatMultAdd_MPIAIJ" 640 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 641 { 642 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 643 int ierr; 644 645 PetscFunctionBegin; 646 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 647 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 648 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 649 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNC__ 654 #define __FUNC__ "MatMultTrans_MPIAIJ" 655 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 656 { 657 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 658 int ierr; 659 660 PetscFunctionBegin; 661 /* do nondiagonal part */ 662 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 663 /* send it on its way */ 664 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 665 /* do local part */ 666 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 667 /* receive remote parts: note this assumes the values are not actually */ 668 /* inserted in yy until the next line, which is true for my implementation*/ 669 /* but is not perhaps always true. */ 670 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 676 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNC__ 700 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 701 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 int ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 705 706 PetscFunctionBegin; 707 if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(1,0,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNC__ 716 #define __FUNC__ "MatScale_MPIAIJ" 717 int MatScale_MPIAIJ(Scalar *aa,Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 720 int ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNC__ 729 #define __FUNC__ "MatDestroy_MPIAIJ" 730 int MatDestroy_MPIAIJ(PetscObject obj) 731 { 732 Mat mat = (Mat) obj; 733 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 734 int ierr; 735 736 PetscFunctionBegin; 737 #if defined(USE_PETSC_LOG) 738 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 739 #endif 740 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 741 PetscFree(aij->rowners); 742 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 743 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 744 if (aij->colmap) PetscFree(aij->colmap); 745 if (aij->garray) PetscFree(aij->garray); 746 if (aij->lvec) VecDestroy(aij->lvec); 747 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 748 if (aij->rowvalues) PetscFree(aij->rowvalues); 749 PetscFree(aij); 750 PLogObjectDestroy(mat); 751 PetscHeaderDestroy(mat); 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNC__ 756 #define __FUNC__ "MatView_MPIAIJ_Binary" 757 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 758 { 759 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 760 int ierr; 761 762 PetscFunctionBegin; 763 if (aij->size == 1) { 764 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 765 } 766 else SETERRQ(1,0,"Only uniprocessor output supported"); 767 PetscFunctionReturn(0); 768 } 769 770 #undef __FUNC__ 771 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 772 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 773 { 774 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 775 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 776 int ierr, format,shift = C->indexshift,rank; 777 FILE *fd; 778 ViewerType vtype; 779 780 PetscFunctionBegin; 781 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 782 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 783 ierr = ViewerGetFormat(viewer,&format); 784 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 785 MatInfo info; 786 int flg; 787 MPI_Comm_rank(mat->comm,&rank); 788 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 789 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 790 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 791 PetscSequentialPhaseBegin(mat->comm,1); 792 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 793 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 794 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 795 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 796 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 797 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 798 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 799 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 800 fflush(fd); 801 PetscSequentialPhaseEnd(mat->comm,1); 802 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 805 PetscFunctionReturn(0); 806 } 807 } 808 809 if (vtype == DRAW_VIEWER) { 810 Draw draw; 811 PetscTruth isnull; 812 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 813 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 814 } 815 816 if (vtype == ASCII_FILE_VIEWER) { 817 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 818 PetscSequentialPhaseBegin(mat->comm,1); 819 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 820 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 821 aij->cend); 822 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 823 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 824 fflush(fd); 825 PetscSequentialPhaseEnd(mat->comm,1); 826 } else { 827 int size = aij->size; 828 rank = aij->rank; 829 if (size == 1) { 830 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 831 } else { 832 /* assemble the entire matrix onto first processor. */ 833 Mat A; 834 Mat_SeqAIJ *Aloc; 835 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 836 Scalar *a; 837 838 if (!rank) { 839 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 840 CHKERRQ(ierr); 841 } else { 842 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A); 843 CHKERRQ(ierr); 844 } 845 PLogObjectParent(mat,A); 846 847 /* copy over the A part */ 848 Aloc = (Mat_SeqAIJ*) aij->A->data; 849 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 850 row = aij->rstart; 851 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 852 for ( i=0; i<m; i++ ) { 853 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 854 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 855 } 856 aj = Aloc->j; 857 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 858 859 /* copy over the B part */ 860 Aloc = (Mat_SeqAIJ*) aij->B->data; 861 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 862 row = aij->rstart; 863 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 864 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 865 for ( i=0; i<m; i++ ) { 866 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 867 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 868 } 869 PetscFree(ct); 870 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 871 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 872 if (!rank) { 873 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 874 } 875 ierr = MatDestroy(A); CHKERRQ(ierr); 876 } 877 } 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNC__ 882 #define __FUNC__ "MatView_MPIAIJ" 883 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 884 { 885 Mat mat = (Mat) obj; 886 int ierr; 887 ViewerType vtype; 888 889 PetscFunctionBegin; 890 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 891 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 892 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 893 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 894 } 895 else if (vtype == BINARY_FILE_VIEWER) { 896 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 897 } 898 PetscFunctionReturn(0); 899 } 900 901 /* 902 This has to provide several versions. 903 904 2) a) use only local smoothing updating outer values only once. 905 b) local smoothing updating outer values each inner iteration 906 3) color updating out values betwen colors. 907 */ 908 #undef __FUNC__ 909 #define __FUNC__ "MatRelax_MPIAIJ" 910 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 911 double fshift,int its,Vec xx) 912 { 913 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 914 Mat AA = mat->A, BB = mat->B; 915 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 916 Scalar *b,*x,*xs,*ls,d,*v,sum; 917 int ierr,*idx, *diag; 918 int n = mat->n, m = mat->m, i,shift = A->indexshift; 919 920 PetscFunctionBegin; 921 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 922 xs = x + shift; /* shift by one for index start of 1 */ 923 ls = ls + shift; 924 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 925 diag = A->diag; 926 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 927 if (flag & SOR_ZERO_INITIAL_GUESS) { 928 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } 931 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 932 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 933 while (its--) { 934 /* go down through the rows */ 935 for ( i=0; i<m; i++ ) { 936 n = A->i[i+1] - A->i[i]; 937 idx = A->j + A->i[i] + shift; 938 v = A->a + A->i[i] + shift; 939 sum = b[i]; 940 SPARSEDENSEMDOT(sum,xs,v,idx,n); 941 d = fshift + A->a[diag[i]+shift]; 942 n = B->i[i+1] - B->i[i]; 943 idx = B->j + B->i[i] + shift; 944 v = B->a + B->i[i] + shift; 945 SPARSEDENSEMDOT(sum,ls,v,idx,n); 946 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 947 } 948 /* come up through the rows */ 949 for ( i=m-1; i>-1; i-- ) { 950 n = A->i[i+1] - A->i[i]; 951 idx = A->j + A->i[i] + shift; 952 v = A->a + A->i[i] + shift; 953 sum = b[i]; 954 SPARSEDENSEMDOT(sum,xs,v,idx,n); 955 d = fshift + A->a[diag[i]+shift]; 956 n = B->i[i+1] - B->i[i]; 957 idx = B->j + B->i[i] + shift; 958 v = B->a + B->i[i] + shift; 959 SPARSEDENSEMDOT(sum,ls,v,idx,n); 960 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 961 } 962 } 963 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 964 if (flag & SOR_ZERO_INITIAL_GUESS) { 965 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 969 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 970 while (its--) { 971 for ( i=0; i<m; i++ ) { 972 n = A->i[i+1] - A->i[i]; 973 idx = A->j + A->i[i] + shift; 974 v = A->a + A->i[i] + shift; 975 sum = b[i]; 976 SPARSEDENSEMDOT(sum,xs,v,idx,n); 977 d = fshift + A->a[diag[i]+shift]; 978 n = B->i[i+1] - B->i[i]; 979 idx = B->j + B->i[i] + shift; 980 v = B->a + B->i[i] + shift; 981 SPARSEDENSEMDOT(sum,ls,v,idx,n); 982 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 983 } 984 } 985 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 986 if (flag & SOR_ZERO_INITIAL_GUESS) { 987 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 991 mat->Mvctx); CHKERRQ(ierr); 992 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 993 mat->Mvctx); CHKERRQ(ierr); 994 while (its--) { 995 for ( i=m-1; i>-1; i-- ) { 996 n = A->i[i+1] - A->i[i]; 997 idx = A->j + A->i[i] + shift; 998 v = A->a + A->i[i] + shift; 999 sum = b[i]; 1000 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1001 d = fshift + A->a[diag[i]+shift]; 1002 n = B->i[i+1] - B->i[i]; 1003 idx = B->j + B->i[i] + shift; 1004 v = B->a + B->i[i] + shift; 1005 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1006 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1007 } 1008 } 1009 } else { 1010 SETERRQ(1,0,"Option not supported"); 1011 } 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNC__ 1016 #define __FUNC__ "MatGetInfo_MPIAIJ" 1017 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1020 Mat A = mat->A, B = mat->B; 1021 int ierr; 1022 double isend[5], irecv[5]; 1023 1024 PetscFunctionBegin; 1025 info->rows_global = (double)mat->M; 1026 info->columns_global = (double)mat->N; 1027 info->rows_local = (double)mat->m; 1028 info->columns_local = (double)mat->N; 1029 info->block_size = 1.0; 1030 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1031 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1032 isend[3] = info->memory; isend[4] = info->mallocs; 1033 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1034 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1035 isend[3] += info->memory; isend[4] += info->mallocs; 1036 if (flag == MAT_LOCAL) { 1037 info->nz_used = isend[0]; 1038 info->nz_allocated = isend[1]; 1039 info->nz_unneeded = isend[2]; 1040 info->memory = isend[3]; 1041 info->mallocs = isend[4]; 1042 } else if (flag == MAT_GLOBAL_MAX) { 1043 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm); 1044 info->nz_used = irecv[0]; 1045 info->nz_allocated = irecv[1]; 1046 info->nz_unneeded = irecv[2]; 1047 info->memory = irecv[3]; 1048 info->mallocs = irecv[4]; 1049 } else if (flag == MAT_GLOBAL_SUM) { 1050 MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm); 1051 info->nz_used = irecv[0]; 1052 info->nz_allocated = irecv[1]; 1053 info->nz_unneeded = irecv[2]; 1054 info->memory = irecv[3]; 1055 info->mallocs = irecv[4]; 1056 } 1057 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1058 info->fill_ratio_needed = 0; 1059 info->factor_mallocs = 0; 1060 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNC__ 1065 #define __FUNC__ "MatSetOption_MPIAIJ" 1066 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1067 { 1068 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1069 1070 PetscFunctionBegin; 1071 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1072 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1073 op == MAT_COLUMNS_UNSORTED || 1074 op == MAT_COLUMNS_SORTED || 1075 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1076 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1077 MatSetOption(a->A,op); 1078 MatSetOption(a->B,op); 1079 } else if (op == MAT_ROW_ORIENTED) { 1080 a->roworiented = 1; 1081 MatSetOption(a->A,op); 1082 MatSetOption(a->B,op); 1083 } else if (op == MAT_ROWS_SORTED || 1084 op == MAT_ROWS_UNSORTED || 1085 op == MAT_SYMMETRIC || 1086 op == MAT_STRUCTURALLY_SYMMETRIC || 1087 op == MAT_YES_NEW_DIAGONALS) 1088 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1089 else if (op == MAT_COLUMN_ORIENTED) { 1090 a->roworiented = 0; 1091 MatSetOption(a->A,op); 1092 MatSetOption(a->B,op); 1093 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1094 a->donotstash = 1; 1095 } else if (op == MAT_NO_NEW_DIAGONALS){ 1096 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1097 } else { 1098 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNC__ 1104 #define __FUNC__ "MatGetSize_MPIAIJ" 1105 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1106 { 1107 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1108 1109 PetscFunctionBegin; 1110 if (m) *m = mat->M; 1111 if (n) *n = mat->N; 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNC__ 1116 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1117 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1118 { 1119 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1120 1121 PetscFunctionBegin; 1122 if (m) *m = mat->m; 1123 if (n) *n = mat->N; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNC__ 1128 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1129 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1130 { 1131 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1132 1133 PetscFunctionBegin; 1134 *m = mat->rstart; *n = mat->rend; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1139 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1140 1141 #undef __FUNC__ 1142 #define __FUNC__ "MatGetRow_MPIAIJ" 1143 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1144 { 1145 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1146 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1147 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1148 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1149 int *cmap, *idx_p; 1150 1151 PetscFunctionBegin; 1152 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1153 mat->getrowactive = PETSC_TRUE; 1154 1155 if (!mat->rowvalues && (idx || v)) { 1156 /* 1157 allocate enough space to hold information from the longest row. 1158 */ 1159 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1160 int max = 1,m = mat->m,tmp; 1161 for ( i=0; i<m; i++ ) { 1162 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1163 if (max < tmp) { max = tmp; } 1164 } 1165 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1166 CHKPTRQ(mat->rowvalues); 1167 mat->rowindices = (int *) (mat->rowvalues + max); 1168 } 1169 1170 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1171 lrow = row - rstart; 1172 1173 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1174 if (!v) {pvA = 0; pvB = 0;} 1175 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1176 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1177 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1178 nztot = nzA + nzB; 1179 1180 cmap = mat->garray; 1181 if (v || idx) { 1182 if (nztot) { 1183 /* Sort by increasing column numbers, assuming A and B already sorted */ 1184 int imark = -1; 1185 if (v) { 1186 *v = v_p = mat->rowvalues; 1187 for ( i=0; i<nzB; i++ ) { 1188 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1189 else break; 1190 } 1191 imark = i; 1192 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1193 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1194 } 1195 if (idx) { 1196 *idx = idx_p = mat->rowindices; 1197 if (imark > -1) { 1198 for ( i=0; i<imark; i++ ) { 1199 idx_p[i] = cmap[cworkB[i]]; 1200 } 1201 } else { 1202 for ( i=0; i<nzB; i++ ) { 1203 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1204 else break; 1205 } 1206 imark = i; 1207 } 1208 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1209 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1210 } 1211 } 1212 else { 1213 if (idx) *idx = 0; 1214 if (v) *v = 0; 1215 } 1216 } 1217 *nz = nztot; 1218 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1219 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNC__ 1224 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1225 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1226 { 1227 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1228 1229 PetscFunctionBegin; 1230 if (aij->getrowactive == PETSC_FALSE) { 1231 SETERRQ(1,0,"MatGetRow not called"); 1232 } 1233 aij->getrowactive = PETSC_FALSE; 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNC__ 1238 #define __FUNC__ "MatNorm_MPIAIJ" 1239 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1240 { 1241 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1242 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1243 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1244 double sum = 0.0; 1245 Scalar *v; 1246 1247 PetscFunctionBegin; 1248 if (aij->size == 1) { 1249 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1250 } else { 1251 if (type == NORM_FROBENIUS) { 1252 v = amat->a; 1253 for (i=0; i<amat->nz; i++ ) { 1254 #if defined(USE_PETSC_COMPLEX) 1255 sum += real(conj(*v)*(*v)); v++; 1256 #else 1257 sum += (*v)*(*v); v++; 1258 #endif 1259 } 1260 v = bmat->a; 1261 for (i=0; i<bmat->nz; i++ ) { 1262 #if defined(USE_PETSC_COMPLEX) 1263 sum += real(conj(*v)*(*v)); v++; 1264 #else 1265 sum += (*v)*(*v); v++; 1266 #endif 1267 } 1268 MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm); 1269 *norm = sqrt(*norm); 1270 } else if (type == NORM_1) { /* max column norm */ 1271 double *tmp, *tmp2; 1272 int *jj, *garray = aij->garray; 1273 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1274 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1275 PetscMemzero(tmp,aij->N*sizeof(double)); 1276 *norm = 0.0; 1277 v = amat->a; jj = amat->j; 1278 for ( j=0; j<amat->nz; j++ ) { 1279 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1280 } 1281 v = bmat->a; jj = bmat->j; 1282 for ( j=0; j<bmat->nz; j++ ) { 1283 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1284 } 1285 MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm); 1286 for ( j=0; j<aij->N; j++ ) { 1287 if (tmp2[j] > *norm) *norm = tmp2[j]; 1288 } 1289 PetscFree(tmp); PetscFree(tmp2); 1290 } else if (type == NORM_INFINITY) { /* max row norm */ 1291 double ntemp = 0.0; 1292 for ( j=0; j<amat->m; j++ ) { 1293 v = amat->a + amat->i[j] + shift; 1294 sum = 0.0; 1295 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1296 sum += PetscAbsScalar(*v); v++; 1297 } 1298 v = bmat->a + bmat->i[j] + shift; 1299 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1300 sum += PetscAbsScalar(*v); v++; 1301 } 1302 if (sum > ntemp) ntemp = sum; 1303 } 1304 MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm); 1305 } 1306 else { 1307 SETERRQ(1,0,"No support for two norm"); 1308 } 1309 } 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNC__ 1314 #define __FUNC__ "MatTranspose_MPIAIJ" 1315 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1316 { 1317 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1318 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1319 int ierr,shift = Aloc->indexshift; 1320 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1321 Mat B; 1322 Scalar *array; 1323 1324 PetscFunctionBegin; 1325 if (matout == PETSC_NULL && M != N) { 1326 SETERRQ(1,0,"Square matrix only for in-place"); 1327 } 1328 1329 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1330 1331 /* copy over the A part */ 1332 Aloc = (Mat_SeqAIJ*) a->A->data; 1333 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1334 row = a->rstart; 1335 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1336 for ( i=0; i<m; i++ ) { 1337 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1338 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1339 } 1340 aj = Aloc->j; 1341 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1342 1343 /* copy over the B part */ 1344 Aloc = (Mat_SeqAIJ*) a->B->data; 1345 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1346 row = a->rstart; 1347 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1348 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1349 for ( i=0; i<m; i++ ) { 1350 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1351 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1352 } 1353 PetscFree(ct); 1354 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1355 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1356 if (matout != PETSC_NULL) { 1357 *matout = B; 1358 } else { 1359 /* This isn't really an in-place transpose .... but free data structures from a */ 1360 PetscFree(a->rowners); 1361 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1362 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1363 if (a->colmap) PetscFree(a->colmap); 1364 if (a->garray) PetscFree(a->garray); 1365 if (a->lvec) VecDestroy(a->lvec); 1366 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1367 PetscFree(a); 1368 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1369 PetscHeaderDestroy(B); 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNC__ 1375 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1376 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1377 { 1378 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1379 Mat a = aij->A, b = aij->B; 1380 int ierr,s1,s2,s3; 1381 1382 PetscFunctionBegin; 1383 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1384 if (rr) { 1385 s3 = aij->n; 1386 VecGetLocalSize_Fast(rr,s1); 1387 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1388 /* Overlap communication with computation. */ 1389 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1390 } 1391 if (ll) { 1392 VecGetLocalSize_Fast(ll,s1); 1393 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1394 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1395 } 1396 /* scale the diagonal block */ 1397 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1398 1399 if (rr) { 1400 /* Do a scatter end and then right scale the off-diagonal block */ 1401 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1402 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1403 } 1404 1405 PetscFunctionReturn(0); 1406 } 1407 1408 1409 extern int MatPrintHelp_SeqAIJ(Mat); 1410 #undef __FUNC__ 1411 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1412 int MatPrintHelp_MPIAIJ(Mat A) 1413 { 1414 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1415 int ierr; 1416 1417 PetscFunctionBegin; 1418 if (!a->rank) { 1419 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNC__ 1425 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1426 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1427 { 1428 PetscFunctionBegin; 1429 *bs = 1; 1430 PetscFunctionReturn(0); 1431 } 1432 #undef __FUNC__ 1433 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1434 int MatSetUnfactored_MPIAIJ(Mat A) 1435 { 1436 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1437 int ierr; 1438 1439 PetscFunctionBegin; 1440 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNC__ 1445 #define __FUNC__ "MatEqual_MPIAIJ" 1446 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1447 { 1448 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1449 Mat a, b, c, d; 1450 PetscTruth flg; 1451 int ierr; 1452 1453 PetscFunctionBegin; 1454 if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type"); 1455 a = matA->A; b = matA->B; 1456 c = matB->A; d = matB->B; 1457 1458 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1459 if (flg == PETSC_TRUE) { 1460 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1461 } 1462 MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm); 1463 PetscFunctionReturn(0); 1464 } 1465 1466 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1467 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1468 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1469 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1470 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,MatGetSubMatrixCall,Mat *); 1471 1472 /* -------------------------------------------------------------------*/ 1473 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1474 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1475 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1476 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1477 0,0, 1478 0,0, 1479 0,0, 1480 MatRelax_MPIAIJ, 1481 MatTranspose_MPIAIJ, 1482 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1483 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1484 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1485 0, 1486 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1487 0,0,0,0, 1488 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1489 0,0, 1490 0,0,MatConvertSameType_MPIAIJ,0,0, 1491 0,0,0, 1492 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1493 MatPrintHelp_MPIAIJ, 1494 MatScale_MPIAIJ,0,0,0, 1495 MatGetBlockSize_MPIAIJ,0,0,0,0, 1496 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1497 0,0,0,MatGetSubMatrix_MPIAIJ}; 1498 1499 1500 #undef __FUNC__ 1501 #define __FUNC__ "MatCreateMPIAIJ" 1502 /*@C 1503 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1504 (the default parallel PETSc format). For good matrix assembly performance 1505 the user should preallocate the matrix storage by setting the parameters 1506 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1507 performance can be increased by more than a factor of 50. 1508 1509 Input Parameters: 1510 . comm - MPI communicator 1511 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1512 This value should be the same as the local size used in creating the 1513 y vector for the matrix-vector product y = Ax. 1514 . n - This value should be the same as the local size used in creating the 1515 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1516 calculated if N is given) For square matrices n is almost always m. 1517 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1518 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1519 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1520 (same for all local rows) 1521 . d_nzz - array containing the number of nonzeros in the various rows of the 1522 diagonal portion of the local submatrix (possibly different for each row) 1523 or PETSC_NULL. You must leave room for the diagonal entry even if 1524 it is zero. 1525 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1526 submatrix (same for all local rows). 1527 . o_nzz - array containing the number of nonzeros in the various rows of the 1528 off-diagonal portion of the local submatrix (possibly different for 1529 each row) or PETSC_NULL. 1530 1531 Output Parameter: 1532 . A - the matrix 1533 1534 Notes: 1535 The AIJ format (also called the Yale sparse matrix format or 1536 compressed row storage), is fully compatible with standard Fortran 77 1537 storage. That is, the stored row and column indices can begin at 1538 either one (as in Fortran) or zero. See the users manual for details. 1539 1540 The user MUST specify either the local or global matrix dimensions 1541 (possibly both). 1542 1543 By default, this format uses inodes (identical nodes) when possible. 1544 We search for consecutive rows with the same nonzero structure, thereby 1545 reusing matrix information to achieve increased efficiency. 1546 1547 Options Database Keys: 1548 $ -mat_aij_no_inode - Do not use inodes 1549 $ -mat_aij_inode_limit <limit> - Set inode limit. 1550 $ (max limit=5) 1551 $ -mat_aij_oneindex - Internally use indexing starting at 1 1552 $ rather than 0. Note: When calling MatSetValues(), 1553 $ the user still MUST index entries starting at 0! 1554 1555 Storage Information: 1556 For a square global matrix we define each processor's diagonal portion 1557 to be its local rows and the corresponding columns (a square submatrix); 1558 each processor's off-diagonal portion encompasses the remainder of the 1559 local matrix (a rectangular submatrix). 1560 1561 The user can specify preallocated storage for the diagonal part of 1562 the local submatrix with either d_nz or d_nnz (not both). Set 1563 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1564 memory allocation. Likewise, specify preallocated storage for the 1565 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1566 1567 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1568 the figure below we depict these three local rows and all columns (0-11). 1569 1570 $ 0 1 2 3 4 5 6 7 8 9 10 11 1571 $ ------------------- 1572 $ row 3 | o o o d d d o o o o o o 1573 $ row 4 | o o o d d d o o o o o o 1574 $ row 5 | o o o d d d o o o o o o 1575 $ ------------------- 1576 $ 1577 1578 Thus, any entries in the d locations are stored in the d (diagonal) 1579 submatrix, and any entries in the o locations are stored in the 1580 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1581 stored simply in the MATSEQAIJ format for compressed row storage. 1582 1583 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1584 and o_nz should indicate the number of nonzeros per row in the o matrix. 1585 In general, for PDE problems in which most nonzeros are near the diagonal, 1586 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1587 or you will get TERRIBLE performance; see the users' manual chapter on 1588 matrices. 1589 1590 .keywords: matrix, aij, compressed row, sparse, parallel 1591 1592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1593 @*/ 1594 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1595 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1596 { 1597 Mat B; 1598 Mat_MPIAIJ *b; 1599 int ierr, i,sum[2],work[2],size; 1600 1601 PetscFunctionBegin; 1602 MPI_Comm_size(comm,&size); 1603 if (size == 1) { 1604 if (M == PETSC_DECIDE) M = m; 1605 if (N == PETSC_DECIDE) N = n; 1606 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 *A = 0; 1611 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1612 PLogObjectCreate(B); 1613 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1614 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1615 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1616 B->destroy = MatDestroy_MPIAIJ; 1617 B->view = MatView_MPIAIJ; 1618 B->factor = 0; 1619 B->assembled = PETSC_FALSE; 1620 B->mapping = 0; 1621 1622 B->insertmode = NOT_SET_VALUES; 1623 b->size = size; 1624 MPI_Comm_rank(comm,&b->rank); 1625 1626 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1627 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1628 } 1629 1630 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1631 work[0] = m; work[1] = n; 1632 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 1633 if (M == PETSC_DECIDE) M = sum[0]; 1634 if (N == PETSC_DECIDE) N = sum[1]; 1635 } 1636 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1637 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1638 b->m = m; B->m = m; 1639 b->n = n; B->n = n; 1640 b->N = N; B->N = N; 1641 b->M = M; B->M = M; 1642 1643 /* build local table of row and column ownerships */ 1644 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1645 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1646 b->cowners = b->rowners + b->size + 2; 1647 MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 1648 b->rowners[0] = 0; 1649 for ( i=2; i<=b->size; i++ ) { 1650 b->rowners[i] += b->rowners[i-1]; 1651 } 1652 b->rstart = b->rowners[b->rank]; 1653 b->rend = b->rowners[b->rank+1]; 1654 MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 1655 b->cowners[0] = 0; 1656 for ( i=2; i<=b->size; i++ ) { 1657 b->cowners[i] += b->cowners[i-1]; 1658 } 1659 b->cstart = b->cowners[b->rank]; 1660 b->cend = b->cowners[b->rank+1]; 1661 1662 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1663 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1664 PLogObjectParent(B,b->A); 1665 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1666 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1667 PLogObjectParent(B,b->B); 1668 1669 /* build cache for off array entries formed */ 1670 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1671 b->donotstash = 0; 1672 b->colmap = 0; 1673 b->garray = 0; 1674 b->roworiented = 1; 1675 1676 /* stuff used for matrix vector multiply */ 1677 b->lvec = 0; 1678 b->Mvctx = 0; 1679 1680 /* stuff for MatGetRow() */ 1681 b->rowindices = 0; 1682 b->rowvalues = 0; 1683 b->getrowactive = PETSC_FALSE; 1684 1685 *A = B; 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNC__ 1690 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1691 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1692 { 1693 Mat mat; 1694 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1695 int ierr, len=0, flg; 1696 1697 PetscFunctionBegin; 1698 *newmat = 0; 1699 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1700 PLogObjectCreate(mat); 1701 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1702 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1703 mat->destroy = MatDestroy_MPIAIJ; 1704 mat->view = MatView_MPIAIJ; 1705 mat->factor = matin->factor; 1706 mat->assembled = PETSC_TRUE; 1707 1708 a->m = mat->m = oldmat->m; 1709 a->n = mat->n = oldmat->n; 1710 a->M = mat->M = oldmat->M; 1711 a->N = mat->N = oldmat->N; 1712 1713 a->rstart = oldmat->rstart; 1714 a->rend = oldmat->rend; 1715 a->cstart = oldmat->cstart; 1716 a->cend = oldmat->cend; 1717 a->size = oldmat->size; 1718 a->rank = oldmat->rank; 1719 mat->insertmode = NOT_SET_VALUES; 1720 a->rowvalues = 0; 1721 a->getrowactive = PETSC_FALSE; 1722 1723 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1724 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1725 a->cowners = a->rowners + a->size + 2; 1726 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1727 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1728 if (oldmat->colmap) { 1729 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1730 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1731 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1732 } else a->colmap = 0; 1733 if (oldmat->garray) { 1734 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1735 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1736 PLogObjectMemory(mat,len*sizeof(int)); 1737 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1738 } else a->garray = 0; 1739 1740 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1741 PLogObjectParent(mat,a->lvec); 1742 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1743 PLogObjectParent(mat,a->Mvctx); 1744 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1745 PLogObjectParent(mat,a->A); 1746 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1747 PLogObjectParent(mat,a->B); 1748 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1749 if (flg) { 1750 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1751 } 1752 *newmat = mat; 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #include "sys.h" 1757 1758 #undef __FUNC__ 1759 #define __FUNC__ "MatLoad_MPIAIJ" 1760 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1761 { 1762 Mat A; 1763 Scalar *vals,*svals; 1764 MPI_Comm comm = ((PetscObject)viewer)->comm; 1765 MPI_Status status; 1766 int i, nz, ierr, j,rstart, rend, fd; 1767 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1768 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1769 int tag = ((PetscObject)viewer)->tag; 1770 1771 PetscFunctionBegin; 1772 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1773 if (!rank) { 1774 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1775 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1776 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1777 if (header[3] < 0) { 1778 SETERRQ(1,1,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 1779 } 1780 } 1781 1782 1783 MPI_Bcast(header+1,3,MPI_INT,0,comm); 1784 M = header[1]; N = header[2]; 1785 /* determine ownership of all rows */ 1786 m = M/size + ((M % size) > rank); 1787 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1788 MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm); 1789 rowners[0] = 0; 1790 for ( i=2; i<=size; i++ ) { 1791 rowners[i] += rowners[i-1]; 1792 } 1793 rstart = rowners[rank]; 1794 rend = rowners[rank+1]; 1795 1796 /* distribute row lengths to all processors */ 1797 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1798 offlens = ourlens + (rend-rstart); 1799 if (!rank) { 1800 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1801 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1802 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1803 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1804 MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm); 1805 PetscFree(sndcounts); 1806 } else { 1807 MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm); 1808 } 1809 1810 if (!rank) { 1811 /* calculate the number of nonzeros on each processor */ 1812 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1813 PetscMemzero(procsnz,size*sizeof(int)); 1814 for ( i=0; i<size; i++ ) { 1815 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1816 procsnz[i] += rowlengths[j]; 1817 } 1818 } 1819 PetscFree(rowlengths); 1820 1821 /* determine max buffer needed and allocate it */ 1822 maxnz = 0; 1823 for ( i=0; i<size; i++ ) { 1824 maxnz = PetscMax(maxnz,procsnz[i]); 1825 } 1826 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1827 1828 /* read in my part of the matrix column indices */ 1829 nz = procsnz[0]; 1830 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1831 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1832 1833 /* read in every one elses and ship off */ 1834 for ( i=1; i<size; i++ ) { 1835 nz = procsnz[i]; 1836 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1837 MPI_Send(cols,nz,MPI_INT,i,tag,comm); 1838 } 1839 PetscFree(cols); 1840 } else { 1841 /* determine buffer space needed for message */ 1842 nz = 0; 1843 for ( i=0; i<m; i++ ) { 1844 nz += ourlens[i]; 1845 } 1846 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1847 1848 /* receive message of column indices*/ 1849 MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status); 1850 MPI_Get_count(&status,MPI_INT,&maxnz); 1851 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1852 } 1853 1854 /* loop over local rows, determining number of off diagonal entries */ 1855 PetscMemzero(offlens,m*sizeof(int)); 1856 jj = 0; 1857 for ( i=0; i<m; i++ ) { 1858 for ( j=0; j<ourlens[i]; j++ ) { 1859 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1860 jj++; 1861 } 1862 } 1863 1864 /* create our matrix */ 1865 for ( i=0; i<m; i++ ) { 1866 ourlens[i] -= offlens[i]; 1867 } 1868 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1869 A = *newmat; 1870 MatSetOption(A,MAT_COLUMNS_SORTED); 1871 for ( i=0; i<m; i++ ) { 1872 ourlens[i] += offlens[i]; 1873 } 1874 1875 if (!rank) { 1876 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1877 1878 /* read in my part of the matrix numerical values */ 1879 nz = procsnz[0]; 1880 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1881 1882 /* insert into matrix */ 1883 jj = rstart; 1884 smycols = mycols; 1885 svals = vals; 1886 for ( i=0; i<m; i++ ) { 1887 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1888 smycols += ourlens[i]; 1889 svals += ourlens[i]; 1890 jj++; 1891 } 1892 1893 /* read in other processors and ship out */ 1894 for ( i=1; i<size; i++ ) { 1895 nz = procsnz[i]; 1896 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1897 MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm); 1898 } 1899 PetscFree(procsnz); 1900 } else { 1901 /* receive numeric values */ 1902 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1903 1904 /* receive message of values*/ 1905 MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status); 1906 MPI_Get_count(&status,MPIU_SCALAR,&maxnz); 1907 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1908 1909 /* insert into matrix */ 1910 jj = rstart; 1911 smycols = mycols; 1912 svals = vals; 1913 for ( i=0; i<m; i++ ) { 1914 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1915 smycols += ourlens[i]; 1916 svals += ourlens[i]; 1917 jj++; 1918 } 1919 } 1920 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1921 1922 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1923 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNC__ 1928 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1929 /* 1930 Not great since it makes two copies of the submatrix, first an SeqAIJ 1931 in local and then by concatenating the local matrices the end result. 1932 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1933 */ 1934 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatGetSubMatrixCall call,Mat *newmat) 1935 { 1936 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1937 Mat *local,M; 1938 Scalar *vwork,*aa; 1939 MPI_Comm comm = mat->comm; 1940 Mat_SeqAIJ *aij; 1941 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1942 1943 PetscFunctionBegin; 1944 MPI_Comm_rank(comm,&rank); 1945 MPI_Comm_size(comm,&size); 1946 1947 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1948 1949 /* 1950 m - number of local rows 1951 n - number of columns (same on all processors) 1952 rstart - first row in new global matrix generated 1953 */ 1954 ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr); 1955 if (call == MAT_INITIAL_MATRIX) { 1956 aij = (Mat_SeqAIJ *) (*local)->data; 1957 if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 1958 ii = aij->i; 1959 jj = aij->j; 1960 1961 /* 1962 Determine the number of non-zeros in the diagonal and off-diagonal 1963 portions of the matrix in order to do correct preallocation 1964 */ 1965 1966 /* first get start and end of "diagonal" columns */ 1967 nlocal = n/size + ((n % size) > rank); 1968 MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm); 1969 rstart = rend - nlocal; 1970 1971 /* next, compute all the lengths */ 1972 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 1973 olens = dlens + m; 1974 for ( i=0; i<m; i++ ) { 1975 jend = ii[i+1] - ii[i]; 1976 olen = 0; 1977 dlen = 0; 1978 for ( j=0; j<jend; j++ ) { 1979 if ( *jj < rstart || *jj >= rend) olen++; 1980 else dlen++; 1981 jj++; 1982 } 1983 olens[i] = olen; 1984 dlens[i] = dlen; 1985 } 1986 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1987 PetscFree(dlens); 1988 } else { 1989 int ml,nl; 1990 1991 M = *newmat; 1992 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1993 if (ml != m) SETERRQ(1,1,"Previous matrix must be same size/layout as request"); 1994 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1995 } 1996 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 1997 aij = (Mat_SeqAIJ *) (*local)->data; 1998 if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 1999 ii = aij->i; 2000 jj = aij->j; 2001 aa = aij->a; 2002 for (i=0; i<m; i++) { 2003 row = rstart + i; 2004 nz = ii[i+1] - ii[i]; 2005 cwork = jj; jj += nz; 2006 vwork = aa; aa += nz; 2007 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2008 } 2009 2010 ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr); 2011 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2012 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2013 *newmat = M; 2014 PetscFunctionReturn(0); 2015 } 2016