1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.276 1999/02/03 03:17:05 curfman Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 #if defined (USE_CTABLE) 22 int ierr; 23 #endif 24 25 PetscFunctionBegin; 26 #if defined (USE_CTABLE) 27 ierr = TableCreate( &aij->colmap, aij->n/5 ); CHKERRQ(ierr); 28 for ( i=0; i<n; i++ ){ 29 ierr = TableAdd( aij->colmap, aij->garray[i] + 1, i+1 );CHKERRQ(ierr); 30 } 31 #else 32 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 33 PLogObjectMemory(mat,aij->N*sizeof(int)); 34 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 35 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 36 #endif 37 PetscFunctionReturn(0); 38 } 39 40 extern int DisAssemble_MPIAIJ(Mat); 41 42 #define CHUNKSIZE 15 43 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 44 { \ 45 \ 46 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 47 rmax = aimax[row]; nrow = ailen[row]; \ 48 col1 = col - shift; \ 49 \ 50 low = 0; high = nrow; \ 51 while (high-low > 5) { \ 52 t = (low+high)/2; \ 53 if (rp[t] > col) high = t; \ 54 else low = t; \ 55 } \ 56 for ( _i=0; _i<nrow; _i++ ) { \ 57 if (rp[_i] > col1) break; \ 58 if (rp[_i] == col1) { \ 59 if (addv == ADD_VALUES) ap[_i] += value; \ 60 else ap[_i] = value; \ 61 goto a_noinsert; \ 62 } \ 63 } \ 64 if (nonew == 1) goto a_noinsert; \ 65 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 66 if (nrow >= rmax) { \ 67 /* there is no extra room in row, therefore enlarge */ \ 68 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 69 Scalar *new_a; \ 70 \ 71 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 72 \ 73 /* malloc new storage space */ \ 74 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 75 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 76 new_j = (int *) (new_a + new_nz); \ 77 new_i = new_j + new_nz; \ 78 \ 79 /* copy over old data into new slots */ \ 80 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 81 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 82 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 83 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 84 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 85 len*sizeof(int)); \ 86 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 87 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 88 len*sizeof(Scalar)); \ 89 /* free up old matrix storage */ \ 90 \ 91 PetscFree(a->a); \ 92 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 93 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 94 a->singlemalloc = 1; \ 95 \ 96 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 97 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 98 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 99 a->maxnz += CHUNKSIZE; \ 100 a->reallocs++; \ 101 } \ 102 N = nrow++ - 1; a->nz++; \ 103 /* shift up all the later entries in this row */ \ 104 for ( ii=N; ii>=_i; ii-- ) { \ 105 rp[ii+1] = rp[ii]; \ 106 ap[ii+1] = ap[ii]; \ 107 } \ 108 rp[_i] = col1; \ 109 ap[_i] = value; \ 110 a_noinsert: ; \ 111 ailen[row] = nrow; \ 112 } 113 114 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 118 rmax = bimax[row]; nrow = bilen[row]; \ 119 col1 = col - shift; \ 120 \ 121 low = 0; high = nrow; \ 122 while (high-low > 5) { \ 123 t = (low+high)/2; \ 124 if (rp[t] > col) high = t; \ 125 else low = t; \ 126 } \ 127 for ( _i=0; _i<nrow; _i++ ) { \ 128 if (rp[_i] > col1) break; \ 129 if (rp[_i] == col1) { \ 130 if (addv == ADD_VALUES) ap[_i] += value; \ 131 else ap[_i] = value; \ 132 goto b_noinsert; \ 133 } \ 134 } \ 135 if (nonew == 1) goto b_noinsert; \ 136 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 137 if (nrow >= rmax) { \ 138 /* there is no extra room in row, therefore enlarge */ \ 139 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 140 Scalar *new_a; \ 141 \ 142 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 143 \ 144 /* malloc new storage space */ \ 145 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 146 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 147 new_j = (int *) (new_a + new_nz); \ 148 new_i = new_j + new_nz; \ 149 \ 150 /* copy over old data into new slots */ \ 151 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 152 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 153 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 154 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 155 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 156 len*sizeof(int)); \ 157 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 158 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 159 len*sizeof(Scalar)); \ 160 /* free up old matrix storage */ \ 161 \ 162 PetscFree(b->a); \ 163 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 164 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 165 b->singlemalloc = 1; \ 166 \ 167 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 168 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 169 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 170 b->maxnz += CHUNKSIZE; \ 171 b->reallocs++; \ 172 } \ 173 N = nrow++ - 1; b->nz++; \ 174 /* shift up all the later entries in this row */ \ 175 for ( ii=N; ii>=_i; ii-- ) { \ 176 rp[ii+1] = rp[ii]; \ 177 ap[ii+1] = ap[ii]; \ 178 } \ 179 rp[_i] = col1; \ 180 ap[_i] = value; \ 181 b_noinsert: ; \ 182 bilen[row] = nrow; \ 183 } 184 185 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 186 #undef __FUNC__ 187 #define __FUNC__ "MatSetValues_MPIAIJ" 188 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 191 Scalar value; 192 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 193 int cstart = aij->cstart, cend = aij->cend,row,col; 194 int roworiented = aij->roworiented; 195 196 /* Some Variables required in the macro */ 197 Mat A = aij->A; 198 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 199 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 200 Scalar *aa = a->a; 201 202 Mat B = aij->B; 203 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 204 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 205 Scalar *ba = b->a; 206 207 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 208 int nonew = a->nonew,shift = a->indexshift; 209 Scalar *ap; 210 211 PetscFunctionBegin; 212 for ( i=0; i<m; i++ ) { 213 if (im[i] < 0) continue; 214 #if defined(USE_PETSC_BOPT_g) 215 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 216 #endif 217 if (im[i] >= rstart && im[i] < rend) { 218 row = im[i] - rstart; 219 for ( j=0; j<n; j++ ) { 220 if (in[j] >= cstart && in[j] < cend){ 221 col = in[j] - cstart; 222 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } 226 else if (in[j] < 0) continue; 227 #if defined(USE_PETSC_BOPT_g) 228 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 229 #endif 230 else { 231 if (mat->was_assembled) { 232 if (!aij->colmap) { 233 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 234 } 235 #if defined (USE_CTABLE) 236 col = TableFind( aij->colmap, in[j] + 1 ) - 1; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ *) B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !aij->donotstash) { 257 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!aij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNC__ 272 #define __FUNC__ "MatGetValues_MPIAIJ" 273 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 276 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 277 int cstart = aij->cstart, cend = aij->cend,row,col; 278 279 PetscFunctionBegin; 280 for ( i=0; i<m; i++ ) { 281 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 282 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 283 if (idxm[i] >= rstart && idxm[i] < rend) { 284 row = idxm[i] - rstart; 285 for ( j=0; j<n; j++ ) { 286 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 287 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 288 if (idxn[j] >= cstart && idxn[j] < cend){ 289 col = idxn[j] - cstart; 290 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 291 } else { 292 if (!aij->colmap) { 293 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 294 } 295 #if defined (USE_CTABLE) 296 col = TableFind( aij->colmap, idxn[j] + 1 ) - 1; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 318 MPI_Comm comm = mat->comm; 319 int size = aij->size, *owners = aij->rowners; 320 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 321 MPI_Request *send_waits,*recv_waits; 322 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 323 InsertMode addv; 324 Scalar *rvalues,*svalues; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 aij->svalues = 0; aij->rvalues = 0; 329 aij->nsends = 0; aij->nrecvs = 0; 330 aij->send_waits = 0; aij->recv_waits = 0; 331 aij->rmax = 0; 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 /* first count number of contributors to each processor */ 343 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 344 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 345 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 346 for ( i=0; i<aij->stash.n; i++ ) { 347 idx = aij->stash.idx[i]; 348 for ( j=0; j<size; j++ ) { 349 if (idx >= owners[j] && idx < owners[j+1]) { 350 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 351 } 352 } 353 } 354 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 355 356 /* inform other processors of number of messages and max length*/ 357 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 358 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 359 nreceives = work[rank]; 360 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 361 nmax = work[rank]; 362 PetscFree(work); 363 364 /* post receives: 365 1) each message will consist of ordered pairs 366 (global index,value) we store the global index as a double 367 to simplify the message passing. 368 2) since we don't know how long each individual message is we 369 allocate the largest needed buffer for each receive. Potentially 370 this is a lot of wasted space. 371 372 373 This could be done better. 374 */ 375 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 376 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 377 for ( i=0; i<nreceives; i++ ) { 378 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 379 comm,recv_waits+i);CHKERRQ(ierr); 380 } 381 382 /* do sends: 383 1) starts[i] gives the starting index in svalues for stuff going to 384 the ith processor 385 */ 386 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 387 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 388 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 389 starts[0] = 0; 390 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 391 for ( i=0; i<aij->stash.n; i++ ) { 392 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 393 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 394 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 395 } 396 PetscFree(owner); 397 starts[0] = 0; 398 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 399 count = 0; 400 for ( i=0; i<size; i++ ) { 401 if (procs[i]) { 402 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 403 } 404 } 405 PetscFree(starts); 406 PetscFree(nprocs); 407 408 /* Free cache space */ 409 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 410 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 411 412 aij->svalues = svalues; aij->rvalues = rvalues; 413 aij->nsends = nsends; aij->nrecvs = nreceives; 414 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 415 aij->rmax = nmax; 416 417 PetscFunctionReturn(0); 418 } 419 extern int MatSetUpMultiply_MPIAIJ(Mat); 420 421 #undef __FUNC__ 422 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 423 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 424 { 425 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 426 MPI_Status *send_status,recv_status; 427 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 428 int row,col,other_disassembled; 429 Scalar *values,val; 430 InsertMode addv = mat->insertmode; 431 432 PetscFunctionBegin; 433 434 /* wait on receives */ 435 while (count) { 436 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 437 /* unpack receives into our local space */ 438 values = aij->rvalues + 3*imdex*aij->rmax; 439 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 440 n = n/3; 441 for ( i=0; i<n; i++ ) { 442 row = (int) PetscReal(values[3*i]) - aij->rstart; 443 col = (int) PetscReal(values[3*i+1]); 444 val = values[3*i+2]; 445 if (col >= aij->cstart && col < aij->cend) { 446 col -= aij->cstart; 447 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 448 } else { 449 if (mat->was_assembled || mat->assembled) { 450 if (!aij->colmap) { 451 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 452 } 453 #if defined (USE_CTABLE) 454 col = TableFind( aij->colmap, col + 1 ) - 1; 455 #else 456 col = aij->colmap[col] - 1; 457 #endif 458 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 col = (int) PetscReal(values[3*i+1]); 461 } 462 } 463 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 464 } 465 } 466 count--; 467 } 468 if (aij->recv_waits) PetscFree(aij->recv_waits); 469 if (aij->rvalues) PetscFree(aij->rvalues); 470 471 /* wait on sends */ 472 if (aij->nsends) { 473 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 474 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 475 PetscFree(send_status); 476 } 477 if (aij->send_waits) PetscFree(aij->send_waits); 478 if (aij->svalues) PetscFree(aij->svalues); 479 480 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 481 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 482 483 /* determine if any processor has disassembled, if so we must 484 also disassemble ourselfs, in order that we may reassemble. */ 485 /* 486 if nonzero structure of submatrix B cannot change then we know that 487 no processor disassembled thus we can skip this stuff 488 */ 489 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 490 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 491 if (mat->was_assembled && !other_disassembled) { 492 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 493 } 494 } 495 496 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 497 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 498 } 499 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 500 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 501 502 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNC__ 507 #define __FUNC__ "MatZeroEntries_MPIAIJ" 508 int MatZeroEntries_MPIAIJ(Mat A) 509 { 510 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 511 int ierr; 512 513 PetscFunctionBegin; 514 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 515 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNC__ 520 #define __FUNC__ "MatZeroRows_MPIAIJ" 521 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 522 { 523 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 524 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 525 int *procs,*nprocs,j,found,idx,nsends,*work,row; 526 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 527 int *rvalues,tag = A->tag,count,base,slen,n,*source; 528 int *lens,imdex,*lrows,*values,rstart=l->rstart; 529 MPI_Comm comm = A->comm; 530 MPI_Request *send_waits,*recv_waits; 531 MPI_Status recv_status,*send_status; 532 IS istmp; 533 PetscTruth localdiag; 534 535 PetscFunctionBegin; 536 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 537 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 538 539 /* first count number of contributors to each processor */ 540 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 541 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 542 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 543 for ( i=0; i<N; i++ ) { 544 idx = rows[i]; 545 found = 0; 546 for ( j=0; j<size; j++ ) { 547 if (idx >= owners[j] && idx < owners[j+1]) { 548 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 549 } 550 } 551 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 552 } 553 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 554 555 /* inform other processors of number of messages and max length*/ 556 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 557 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 558 nrecvs = work[rank]; 559 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 560 nmax = work[rank]; 561 PetscFree(work); 562 563 /* post receives: */ 564 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 565 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 566 for ( i=0; i<nrecvs; i++ ) { 567 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 568 } 569 570 /* do sends: 571 1) starts[i] gives the starting index in svalues for stuff going to 572 the ith processor 573 */ 574 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 575 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 576 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 577 starts[0] = 0; 578 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 579 for ( i=0; i<N; i++ ) { 580 svalues[starts[owner[i]]++] = rows[i]; 581 } 582 ISRestoreIndices(is,&rows); 583 584 starts[0] = 0; 585 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 586 count = 0; 587 for ( i=0; i<size; i++ ) { 588 if (procs[i]) { 589 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 590 } 591 } 592 PetscFree(starts); 593 594 base = owners[rank]; 595 596 /* wait on receives */ 597 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 598 source = lens + nrecvs; 599 count = nrecvs; slen = 0; 600 while (count) { 601 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 602 /* unpack receives into our local space */ 603 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 604 source[imdex] = recv_status.MPI_SOURCE; 605 lens[imdex] = n; 606 slen += n; 607 count--; 608 } 609 PetscFree(recv_waits); 610 611 /* move the data into the send scatter */ 612 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 613 count = 0; 614 for ( i=0; i<nrecvs; i++ ) { 615 values = rvalues + i*nmax; 616 for ( j=0; j<lens[i]; j++ ) { 617 lrows[count++] = values[j] - base; 618 } 619 } 620 PetscFree(rvalues); PetscFree(lens); 621 PetscFree(owner); PetscFree(nprocs); 622 623 /* actually zap the local rows */ 624 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 625 PLogObjectParent(A,istmp); 626 627 /* 628 Zero the required rows. If the "diagonal block" of the matrix 629 is square and the user wishes to set the diagonal we use seperate 630 code so that MatSetValues() is not called for each diagonal allocating 631 new memory, thus calling lots of mallocs and slowing things down. 632 633 Contributed by: Mathew Knepley 634 */ 635 localdiag = PETSC_FALSE; 636 if (diag && (l->A->M == l->A->N)) { 637 localdiag = PETSC_TRUE; 638 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 639 } else { 640 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 641 } 642 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 643 ierr = ISDestroy(istmp); CHKERRQ(ierr); 644 645 if (diag && (localdiag == PETSC_FALSE)) { 646 for ( i = 0; i < slen; i++ ) { 647 row = lrows[i] + rstart; 648 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 649 } 650 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 651 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 652 } 653 654 if (diag) { 655 for ( i = 0; i < slen; i++ ) { 656 row = lrows[i] + rstart; 657 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 658 } 659 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 660 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 661 } 662 PetscFree(lrows); 663 664 /* wait on sends */ 665 if (nsends) { 666 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 667 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 668 PetscFree(send_status); 669 } 670 PetscFree(send_waits); PetscFree(svalues); 671 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNC__ 676 #define __FUNC__ "MatMult_MPIAIJ" 677 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 678 { 679 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 680 int ierr,nt; 681 682 PetscFunctionBegin; 683 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 684 if (nt != a->n) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 686 } 687 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 688 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 689 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 690 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNC__ 695 #define __FUNC__ "MatMultAdd_MPIAIJ" 696 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 697 { 698 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 699 int ierr; 700 701 PetscFunctionBegin; 702 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 703 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 704 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 705 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNC__ 710 #define __FUNC__ "MatMultTrans_MPIAIJ" 711 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 712 { 713 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 714 int ierr; 715 716 PetscFunctionBegin; 717 /* do nondiagonal part */ 718 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 719 /* send it on its way */ 720 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 721 /* do local part */ 722 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 723 /* receive remote parts: note this assumes the values are not actually */ 724 /* inserted in yy until the next line, which is true for my implementation*/ 725 /* but is not perhaps always true. */ 726 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNC__ 731 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 732 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 733 { 734 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 735 int ierr; 736 737 PetscFunctionBegin; 738 /* do nondiagonal part */ 739 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 740 /* send it on its way */ 741 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 742 /* do local part */ 743 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 744 /* receive remote parts: note this assumes the values are not actually */ 745 /* inserted in yy until the next line, which is true for my implementation*/ 746 /* but is not perhaps always true. */ 747 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 /* 752 This only works correctly for square matrices where the subblock A->A is the 753 diagonal block 754 */ 755 #undef __FUNC__ 756 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 757 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 758 { 759 int ierr; 760 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 761 762 PetscFunctionBegin; 763 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 764 if (a->rstart != a->cstart || a->rend != a->cend) { 765 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 766 } 767 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 #undef __FUNC__ 772 #define __FUNC__ "MatScale_MPIAIJ" 773 int MatScale_MPIAIJ(Scalar *aa,Mat A) 774 { 775 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 776 int ierr; 777 778 PetscFunctionBegin; 779 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 780 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNC__ 785 #define __FUNC__ "MatDestroy_MPIAIJ" 786 int MatDestroy_MPIAIJ(Mat mat) 787 { 788 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 789 int ierr; 790 791 PetscFunctionBegin; 792 if (--mat->refct > 0) PetscFunctionReturn(0); 793 794 if (mat->mapping) { 795 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 796 } 797 if (mat->bmapping) { 798 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 799 } 800 if (mat->rmap) { 801 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 802 } 803 if (mat->cmap) { 804 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 805 } 806 #if defined(USE_PETSC_LOG) 807 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 808 #endif 809 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 810 PetscFree(aij->rowners); 811 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 812 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 813 #if defined (USE_CTABLE) 814 if (aij->colmap) TableDelete(aij->colmap); 815 #else 816 if (aij->colmap) PetscFree(aij->colmap); 817 #endif 818 if (aij->garray) PetscFree(aij->garray); 819 if (aij->lvec) VecDestroy(aij->lvec); 820 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 821 if (aij->rowvalues) PetscFree(aij->rowvalues); 822 PetscFree(aij); 823 PLogObjectDestroy(mat); 824 PetscHeaderDestroy(mat); 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNC__ 829 #define __FUNC__ "MatView_MPIAIJ_Binary" 830 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 831 { 832 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 833 int ierr; 834 835 PetscFunctionBegin; 836 if (aij->size == 1) { 837 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 838 } 839 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 840 PetscFunctionReturn(0); 841 } 842 843 #undef __FUNC__ 844 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 845 extern int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 846 { 847 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 848 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 849 int ierr, format,shift = C->indexshift,rank = aij->rank ; 850 int size = aij->size; 851 FILE *fd; 852 ViewerType vtype; 853 854 PetscFunctionBegin; 855 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 856 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 857 ierr = ViewerGetFormat(viewer,&format); 858 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 859 MatInfo info; 860 int flg; 861 MPI_Comm_rank(mat->comm,&rank); 862 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 863 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 864 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 865 PetscSequentialPhaseBegin(mat->comm,1); 866 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 867 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 868 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 869 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 870 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 871 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 872 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 873 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 874 fflush(fd); 875 PetscSequentialPhaseEnd(mat->comm,1); 876 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 877 PetscFunctionReturn(0); 878 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 879 PetscFunctionReturn(0); 880 } 881 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 882 Draw draw; 883 PetscTruth isnull; 884 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 885 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 886 } 887 888 if (size == 1) { 889 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 890 } else { 891 /* assemble the entire matrix onto first processor. */ 892 Mat A; 893 Mat_SeqAIJ *Aloc; 894 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 895 Scalar *a; 896 897 if (!rank) { 898 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 899 } else { 900 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 901 } 902 PLogObjectParent(mat,A); 903 904 /* copy over the A part */ 905 Aloc = (Mat_SeqAIJ*) aij->A->data; 906 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 907 row = aij->rstart; 908 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 909 for ( i=0; i<m; i++ ) { 910 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 911 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 912 } 913 aj = Aloc->j; 914 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 915 916 /* copy over the B part */ 917 Aloc = (Mat_SeqAIJ*) aij->B->data; 918 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 919 row = aij->rstart; 920 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 921 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 922 for ( i=0; i<m; i++ ) { 923 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 924 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 925 } 926 PetscFree(ct); 927 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 928 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 929 /* 930 Everyone has to call to draw the matrix since the graphics waits are 931 synchronized across all processors that share the Draw object 932 */ 933 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 934 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 935 } 936 ierr = MatDestroy(A); CHKERRQ(ierr); 937 } 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNC__ 942 #define __FUNC__ "MatView_MPIAIJ" 943 int MatView_MPIAIJ(Mat mat,Viewer viewer) 944 { 945 int ierr; 946 ViewerType vtype; 947 948 PetscFunctionBegin; 949 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 950 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 951 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 952 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 953 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 954 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 955 } else { 956 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 957 } 958 PetscFunctionReturn(0); 959 } 960 961 /* 962 This has to provide several versions. 963 964 2) a) use only local smoothing updating outer values only once. 965 b) local smoothing updating outer values each inner iteration 966 3) color updating out values betwen colors. 967 */ 968 #undef __FUNC__ 969 #define __FUNC__ "MatRelax_MPIAIJ" 970 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 971 double fshift,int its,Vec xx) 972 { 973 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 974 Mat AA = mat->A, BB = mat->B; 975 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 976 Scalar *b,*x,*xs,*ls,d,*v,sum; 977 int ierr,*idx, *diag; 978 int n = mat->n, m = mat->m, i,shift = A->indexshift; 979 980 PetscFunctionBegin; 981 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 982 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 983 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 984 xs = x + shift; /* shift by one for index start of 1 */ 985 ls = ls + shift; 986 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 987 diag = A->diag; 988 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 989 if (flag & SOR_ZERO_INITIAL_GUESS) { 990 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 991 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 992 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 993 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 997 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 998 while (its--) { 999 /* go down through the rows */ 1000 for ( i=0; i<m; i++ ) { 1001 n = A->i[i+1] - A->i[i]; 1002 PLogFlops(4*n+3); 1003 idx = A->j + A->i[i] + shift; 1004 v = A->a + A->i[i] + shift; 1005 sum = b[i]; 1006 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1007 d = fshift + A->a[diag[i]+shift]; 1008 n = B->i[i+1] - B->i[i]; 1009 idx = B->j + B->i[i] + shift; 1010 v = B->a + B->i[i] + shift; 1011 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1012 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1013 } 1014 /* come up through the rows */ 1015 for ( i=m-1; i>-1; i-- ) { 1016 n = A->i[i+1] - A->i[i]; 1017 PLogFlops(4*n+3) 1018 idx = A->j + A->i[i] + shift; 1019 v = A->a + A->i[i] + shift; 1020 sum = b[i]; 1021 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1022 d = fshift + A->a[diag[i]+shift]; 1023 n = B->i[i+1] - B->i[i]; 1024 idx = B->j + B->i[i] + shift; 1025 v = B->a + B->i[i] + shift; 1026 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1027 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1028 } 1029 } 1030 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1031 if (flag & SOR_ZERO_INITIAL_GUESS) { 1032 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1033 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1034 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1035 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1039 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 while (its--) { 1041 for ( i=0; i<m; i++ ) { 1042 n = A->i[i+1] - A->i[i]; 1043 PLogFlops(4*n+3); 1044 idx = A->j + A->i[i] + shift; 1045 v = A->a + A->i[i] + shift; 1046 sum = b[i]; 1047 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1048 d = fshift + A->a[diag[i]+shift]; 1049 n = B->i[i+1] - B->i[i]; 1050 idx = B->j + B->i[i] + shift; 1051 v = B->a + B->i[i] + shift; 1052 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1053 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1054 } 1055 } 1056 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1057 if (flag & SOR_ZERO_INITIAL_GUESS) { 1058 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1059 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1060 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1061 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1065 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1066 while (its--) { 1067 for ( i=m-1; i>-1; i-- ) { 1068 n = A->i[i+1] - A->i[i]; 1069 PLogFlops(4*n+3); 1070 idx = A->j + A->i[i] + shift; 1071 v = A->a + A->i[i] + shift; 1072 sum = b[i]; 1073 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1074 d = fshift + A->a[diag[i]+shift]; 1075 n = B->i[i+1] - B->i[i]; 1076 idx = B->j + B->i[i] + shift; 1077 v = B->a + B->i[i] + shift; 1078 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1079 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1080 } 1081 } 1082 } else { 1083 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1084 } 1085 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1086 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1087 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNC__ 1092 #define __FUNC__ "MatGetInfo_MPIAIJ" 1093 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1094 { 1095 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1096 Mat A = mat->A, B = mat->B; 1097 int ierr; 1098 double isend[5], irecv[5]; 1099 1100 PetscFunctionBegin; 1101 info->block_size = 1.0; 1102 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1103 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1104 isend[3] = info->memory; isend[4] = info->mallocs; 1105 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1106 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1107 isend[3] += info->memory; isend[4] += info->mallocs; 1108 if (flag == MAT_LOCAL) { 1109 info->nz_used = isend[0]; 1110 info->nz_allocated = isend[1]; 1111 info->nz_unneeded = isend[2]; 1112 info->memory = isend[3]; 1113 info->mallocs = isend[4]; 1114 } else if (flag == MAT_GLOBAL_MAX) { 1115 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1116 info->nz_used = irecv[0]; 1117 info->nz_allocated = irecv[1]; 1118 info->nz_unneeded = irecv[2]; 1119 info->memory = irecv[3]; 1120 info->mallocs = irecv[4]; 1121 } else if (flag == MAT_GLOBAL_SUM) { 1122 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1123 info->nz_used = irecv[0]; 1124 info->nz_allocated = irecv[1]; 1125 info->nz_unneeded = irecv[2]; 1126 info->memory = irecv[3]; 1127 info->mallocs = irecv[4]; 1128 } 1129 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1130 info->fill_ratio_needed = 0; 1131 info->factor_mallocs = 0; 1132 info->rows_global = (double)mat->M; 1133 info->columns_global = (double)mat->N; 1134 info->rows_local = (double)mat->m; 1135 info->columns_local = (double)mat->N; 1136 1137 PetscFunctionReturn(0); 1138 } 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ "MatSetOption_MPIAIJ" 1142 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1145 1146 PetscFunctionBegin; 1147 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1148 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1149 op == MAT_COLUMNS_UNSORTED || 1150 op == MAT_COLUMNS_SORTED || 1151 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1152 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1153 MatSetOption(a->A,op); 1154 MatSetOption(a->B,op); 1155 } else if (op == MAT_ROW_ORIENTED) { 1156 a->roworiented = 1; 1157 MatSetOption(a->A,op); 1158 MatSetOption(a->B,op); 1159 } else if (op == MAT_ROWS_SORTED || 1160 op == MAT_ROWS_UNSORTED || 1161 op == MAT_SYMMETRIC || 1162 op == MAT_STRUCTURALLY_SYMMETRIC || 1163 op == MAT_YES_NEW_DIAGONALS) 1164 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1165 else if (op == MAT_COLUMN_ORIENTED) { 1166 a->roworiented = 0; 1167 MatSetOption(a->A,op); 1168 MatSetOption(a->B,op); 1169 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1170 a->donotstash = 1; 1171 } else if (op == MAT_NO_NEW_DIAGONALS){ 1172 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1173 } else { 1174 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1175 } 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatGetSize_MPIAIJ" 1181 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1182 { 1183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1184 1185 PetscFunctionBegin; 1186 if (m) *m = mat->M; 1187 if (n) *n = mat->N; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNC__ 1192 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1193 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1194 { 1195 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1196 1197 PetscFunctionBegin; 1198 if (m) *m = mat->m; 1199 if (n) *n = mat->n; 1200 PetscFunctionReturn(0); 1201 } 1202 1203 #undef __FUNC__ 1204 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1205 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1206 { 1207 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1208 1209 PetscFunctionBegin; 1210 *m = mat->rstart; *n = mat->rend; 1211 PetscFunctionReturn(0); 1212 } 1213 1214 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1215 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1216 1217 #undef __FUNC__ 1218 #define __FUNC__ "MatGetRow_MPIAIJ" 1219 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1220 { 1221 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1222 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1223 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1224 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1225 int *cmap, *idx_p; 1226 1227 PetscFunctionBegin; 1228 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1229 mat->getrowactive = PETSC_TRUE; 1230 1231 if (!mat->rowvalues && (idx || v)) { 1232 /* 1233 allocate enough space to hold information from the longest row. 1234 */ 1235 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1236 int max = 1,m = mat->m,tmp; 1237 for ( i=0; i<m; i++ ) { 1238 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1239 if (max < tmp) { max = tmp; } 1240 } 1241 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1242 mat->rowindices = (int *) (mat->rowvalues + max); 1243 } 1244 1245 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1246 lrow = row - rstart; 1247 1248 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1249 if (!v) {pvA = 0; pvB = 0;} 1250 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1251 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1252 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1253 nztot = nzA + nzB; 1254 1255 cmap = mat->garray; 1256 if (v || idx) { 1257 if (nztot) { 1258 /* Sort by increasing column numbers, assuming A and B already sorted */ 1259 int imark = -1; 1260 if (v) { 1261 *v = v_p = mat->rowvalues; 1262 for ( i=0; i<nzB; i++ ) { 1263 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1264 else break; 1265 } 1266 imark = i; 1267 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1268 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1269 } 1270 if (idx) { 1271 *idx = idx_p = mat->rowindices; 1272 if (imark > -1) { 1273 for ( i=0; i<imark; i++ ) { 1274 idx_p[i] = cmap[cworkB[i]]; 1275 } 1276 } else { 1277 for ( i=0; i<nzB; i++ ) { 1278 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1279 else break; 1280 } 1281 imark = i; 1282 } 1283 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1284 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1285 } 1286 } else { 1287 if (idx) *idx = 0; 1288 if (v) *v = 0; 1289 } 1290 } 1291 *nz = nztot; 1292 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1293 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNC__ 1298 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1299 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1300 { 1301 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1302 1303 PetscFunctionBegin; 1304 if (aij->getrowactive == PETSC_FALSE) { 1305 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1306 } 1307 aij->getrowactive = PETSC_FALSE; 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNC__ 1312 #define __FUNC__ "MatNorm_MPIAIJ" 1313 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1314 { 1315 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1316 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1317 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1318 double sum = 0.0; 1319 Scalar *v; 1320 1321 PetscFunctionBegin; 1322 if (aij->size == 1) { 1323 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1324 } else { 1325 if (type == NORM_FROBENIUS) { 1326 v = amat->a; 1327 for (i=0; i<amat->nz; i++ ) { 1328 #if defined(USE_PETSC_COMPLEX) 1329 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1330 #else 1331 sum += (*v)*(*v); v++; 1332 #endif 1333 } 1334 v = bmat->a; 1335 for (i=0; i<bmat->nz; i++ ) { 1336 #if defined(USE_PETSC_COMPLEX) 1337 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1338 #else 1339 sum += (*v)*(*v); v++; 1340 #endif 1341 } 1342 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1343 *norm = sqrt(*norm); 1344 } else if (type == NORM_1) { /* max column norm */ 1345 double *tmp, *tmp2; 1346 int *jj, *garray = aij->garray; 1347 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1348 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1349 PetscMemzero(tmp,aij->N*sizeof(double)); 1350 *norm = 0.0; 1351 v = amat->a; jj = amat->j; 1352 for ( j=0; j<amat->nz; j++ ) { 1353 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1354 } 1355 v = bmat->a; jj = bmat->j; 1356 for ( j=0; j<bmat->nz; j++ ) { 1357 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1358 } 1359 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1360 for ( j=0; j<aij->N; j++ ) { 1361 if (tmp2[j] > *norm) *norm = tmp2[j]; 1362 } 1363 PetscFree(tmp); PetscFree(tmp2); 1364 } else if (type == NORM_INFINITY) { /* max row norm */ 1365 double ntemp = 0.0; 1366 for ( j=0; j<amat->m; j++ ) { 1367 v = amat->a + amat->i[j] + shift; 1368 sum = 0.0; 1369 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1370 sum += PetscAbsScalar(*v); v++; 1371 } 1372 v = bmat->a + bmat->i[j] + shift; 1373 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1374 sum += PetscAbsScalar(*v); v++; 1375 } 1376 if (sum > ntemp) ntemp = sum; 1377 } 1378 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1379 } else { 1380 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1381 } 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 #undef __FUNC__ 1387 #define __FUNC__ "MatTranspose_MPIAIJ" 1388 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1389 { 1390 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1391 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1392 int ierr,shift = Aloc->indexshift; 1393 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1394 Mat B; 1395 Scalar *array; 1396 1397 PetscFunctionBegin; 1398 if (matout == PETSC_NULL && M != N) { 1399 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1400 } 1401 1402 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1403 1404 /* copy over the A part */ 1405 Aloc = (Mat_SeqAIJ*) a->A->data; 1406 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1407 row = a->rstart; 1408 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1409 for ( i=0; i<m; i++ ) { 1410 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1411 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1412 } 1413 aj = Aloc->j; 1414 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1415 1416 /* copy over the B part */ 1417 Aloc = (Mat_SeqAIJ*) a->B->data; 1418 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1419 row = a->rstart; 1420 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1421 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1422 for ( i=0; i<m; i++ ) { 1423 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1424 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1425 } 1426 PetscFree(ct); 1427 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1429 if (matout != PETSC_NULL) { 1430 *matout = B; 1431 } else { 1432 PetscOps *Abops; 1433 struct _MatOps *Aops; 1434 1435 /* This isn't really an in-place transpose .... but free data structures from a */ 1436 PetscFree(a->rowners); 1437 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1438 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1439 #if defined (USE_CTABLE) 1440 if (a->colmap) TableDelete(a->colmap); 1441 #else 1442 if (a->colmap) PetscFree(a->colmap); 1443 #endif 1444 if (a->garray) PetscFree(a->garray); 1445 if (a->lvec) VecDestroy(a->lvec); 1446 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1447 PetscFree(a); 1448 1449 /* 1450 This is horrible, horrible code. We need to keep the 1451 A pointers for the bops and ops but copy everything 1452 else from C. 1453 */ 1454 Abops = A->bops; 1455 Aops = A->ops; 1456 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1457 A->bops = Abops; 1458 A->ops = Aops; 1459 PetscHeaderDestroy(B); 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNC__ 1465 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1466 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1467 { 1468 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1469 Mat a = aij->A, b = aij->B; 1470 int ierr,s1,s2,s3; 1471 1472 PetscFunctionBegin; 1473 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1474 if (rr) { 1475 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1476 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1477 /* Overlap communication with computation. */ 1478 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1479 } 1480 if (ll) { 1481 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1482 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1483 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1484 } 1485 /* scale the diagonal block */ 1486 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1487 1488 if (rr) { 1489 /* Do a scatter end and then right scale the off-diagonal block */ 1490 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1491 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1492 } 1493 1494 PetscFunctionReturn(0); 1495 } 1496 1497 1498 extern int MatPrintHelp_SeqAIJ(Mat); 1499 #undef __FUNC__ 1500 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1501 int MatPrintHelp_MPIAIJ(Mat A) 1502 { 1503 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1504 int ierr; 1505 1506 PetscFunctionBegin; 1507 if (!a->rank) { 1508 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1509 } 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNC__ 1514 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1515 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1516 { 1517 PetscFunctionBegin; 1518 *bs = 1; 1519 PetscFunctionReturn(0); 1520 } 1521 #undef __FUNC__ 1522 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1523 int MatSetUnfactored_MPIAIJ(Mat A) 1524 { 1525 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1526 int ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNC__ 1534 #define __FUNC__ "MatEqual_MPIAIJ" 1535 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1536 { 1537 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1538 Mat a, b, c, d; 1539 PetscTruth flg; 1540 int ierr; 1541 1542 PetscFunctionBegin; 1543 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1544 a = matA->A; b = matA->B; 1545 c = matB->A; d = matB->B; 1546 1547 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1548 if (flg == PETSC_TRUE) { 1549 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1550 } 1551 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNC__ 1556 #define __FUNC__ "MatCopy_MPIAIJ" 1557 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1558 { 1559 int ierr; 1560 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1561 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1562 1563 PetscFunctionBegin; 1564 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1565 /* because of the column compression in the off-processor part of the matrix a->B, 1566 the number of columns in a->B and b->B may be different, hence we cannot call 1567 the MatCopy() directly on the two parts. If need be, we can provide a more 1568 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1569 then copying the submatrices */ 1570 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1571 } else { 1572 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1573 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1579 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1580 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1581 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1582 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1583 1584 /* -------------------------------------------------------------------*/ 1585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1586 MatGetRow_MPIAIJ, 1587 MatRestoreRow_MPIAIJ, 1588 MatMult_MPIAIJ, 1589 MatMultAdd_MPIAIJ, 1590 MatMultTrans_MPIAIJ, 1591 MatMultTransAdd_MPIAIJ, 1592 0, 1593 0, 1594 0, 1595 0, 1596 0, 1597 0, 1598 MatRelax_MPIAIJ, 1599 MatTranspose_MPIAIJ, 1600 MatGetInfo_MPIAIJ, 1601 MatEqual_MPIAIJ, 1602 MatGetDiagonal_MPIAIJ, 1603 MatDiagonalScale_MPIAIJ, 1604 MatNorm_MPIAIJ, 1605 MatAssemblyBegin_MPIAIJ, 1606 MatAssemblyEnd_MPIAIJ, 1607 0, 1608 MatSetOption_MPIAIJ, 1609 MatZeroEntries_MPIAIJ, 1610 MatZeroRows_MPIAIJ, 1611 0, 1612 0, 1613 0, 1614 0, 1615 MatGetSize_MPIAIJ, 1616 MatGetLocalSize_MPIAIJ, 1617 MatGetOwnershipRange_MPIAIJ, 1618 0, 1619 0, 1620 0, 1621 0, 1622 MatDuplicate_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 0, 1627 0, 1628 MatGetSubMatrices_MPIAIJ, 1629 MatIncreaseOverlap_MPIAIJ, 1630 MatGetValues_MPIAIJ, 1631 MatCopy_MPIAIJ, 1632 MatPrintHelp_MPIAIJ, 1633 MatScale_MPIAIJ, 1634 0, 1635 0, 1636 0, 1637 MatGetBlockSize_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 0, 1642 MatFDColoringCreate_MPIAIJ, 1643 0, 1644 MatSetUnfactored_MPIAIJ, 1645 0, 1646 0, 1647 MatGetSubMatrix_MPIAIJ, 1648 0, 1649 0, 1650 MatGetMaps_Petsc}; 1651 1652 /* ----------------------------------------------------------------------------------------*/ 1653 1654 EXTERN_C_BEGIN 1655 #undef __FUNC__ 1656 #define __FUNC__ "MatStoreValues_MPIAIJ" 1657 int MatStoreValues_MPIAIJ(Mat mat) 1658 { 1659 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1660 int ierr; 1661 1662 PetscFunctionBegin; 1663 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1664 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1665 PetscFunctionReturn(0); 1666 } 1667 EXTERN_C_END 1668 1669 EXTERN_C_BEGIN 1670 #undef __FUNC__ 1671 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1672 int MatRetrieveValues_MPIAIJ(Mat mat) 1673 { 1674 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1675 int ierr; 1676 1677 PetscFunctionBegin; 1678 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1679 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1680 PetscFunctionReturn(0); 1681 } 1682 EXTERN_C_END 1683 1684 #include "pc.h" 1685 EXTERN_C_BEGIN 1686 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1687 EXTERN_C_END 1688 1689 #undef __FUNC__ 1690 #define __FUNC__ "MatCreateMPIAIJ" 1691 /*@C 1692 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1693 (the default parallel PETSc format). For good matrix assembly performance 1694 the user should preallocate the matrix storage by setting the parameters 1695 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1696 performance can be increased by more than a factor of 50. 1697 1698 Collective on MPI_Comm 1699 1700 Input Parameters: 1701 + comm - MPI communicator 1702 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1703 This value should be the same as the local size used in creating the 1704 y vector for the matrix-vector product y = Ax. 1705 . n - This value should be the same as the local size used in creating the 1706 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1707 calculated if N is given) For square matrices n is almost always m. 1708 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1709 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1710 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1711 (same value is used for all local rows) 1712 . d_nnz - array containing the number of nonzeros in the various rows of the 1713 DIAGONAL portion of the local submatrix (possibly different for each row) 1714 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1715 The size of this array is equal to the number of local rows, i.e 'm'. 1716 You must leave room for the diagonal entry even if it is zero. 1717 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1718 submatrix (same value is used for all local rows). 1719 - o_nnz - array containing the number of nonzeros in the various rows of the 1720 OFF-DIAGONAL portion of the local submatrix (possibly different for 1721 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1722 structure. The size of this array is equal to the number 1723 of local rows, i.e 'm'. 1724 1725 Output Parameter: 1726 . A - the matrix 1727 1728 Notes: 1729 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1730 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1731 storage requirements for this matrix. 1732 1733 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1734 processor than it must be used on all processors that share the object for 1735 that argument. 1736 1737 The AIJ format (also called the Yale sparse matrix format or 1738 compressed row storage), is fully compatible with standard Fortran 77 1739 storage. That is, the stored row and column indices can begin at 1740 either one (as in Fortran) or zero. See the users manual for details. 1741 1742 The user MUST specify either the local or global matrix dimensions 1743 (possibly both). 1744 1745 The parallel matrix is partitioned such that the first m0 rows belong to 1746 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1747 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1748 1749 The DIAGONAL portion of the local submatrix of a processor can be defined 1750 as the submatrix which is obtained by extraction the part corresponding 1751 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1752 first row that belongs to the processor, and r2 is the last row belonging 1753 to the this processor. This is a square mxm matrix. The remaining portion 1754 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1755 1756 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1757 1758 By default, this format uses inodes (identical nodes) when possible. 1759 We search for consecutive rows with the same nonzero structure, thereby 1760 reusing matrix information to achieve increased efficiency. 1761 1762 Options Database Keys: 1763 + -mat_aij_no_inode - Do not use inodes 1764 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1765 - -mat_aij_oneindex - Internally use indexing starting at 1 1766 rather than 0. Note that when calling MatSetValues(), 1767 the user still MUST index entries starting at 0! 1768 . -mat_mpi - use the parallel matrix data structures even on one processor 1769 (defaults to using SeqBAIJ format on one processor) 1770 . -mat_mpi - use the parallel matrix data structures even on one processor 1771 (defaults to using SeqAIJ format on one processor) 1772 1773 1774 Example usage: 1775 1776 Consider the following 8x8 matrix with 34 non-zero values, that is 1777 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1778 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1779 as follows: 1780 1781 .vb 1782 1 2 0 | 0 3 0 | 0 4 1783 Proc0 0 5 6 | 7 0 0 | 8 0 1784 9 0 10 | 11 0 0 | 12 0 1785 ------------------------------------- 1786 13 0 14 | 15 16 17 | 0 0 1787 Proc1 0 18 0 | 19 20 21 | 0 0 1788 0 0 0 | 22 23 0 | 24 0 1789 ------------------------------------- 1790 Proc2 25 26 27 | 0 0 28 | 29 0 1791 30 0 0 | 31 32 33 | 0 34 1792 .ve 1793 1794 This can be represented as a collection of submatrices as: 1795 1796 .vb 1797 A B C 1798 D E F 1799 G H I 1800 .ve 1801 1802 Where the submatrices A,B,C are owned by proc0, D,E,F are 1803 owned by proc1, G,H,I are owned by proc2. 1804 1805 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1806 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1807 The 'M','N' parameters are 8,8, and have the same values on all procs. 1808 1809 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1810 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1811 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1812 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1813 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1814 matrix, ans [DF] as another SeqAIJ matrix. 1815 1816 When d_nz, o_nz parameters are specified, d_nz storage elements are 1817 allocated for every row of the local diagonal submatrix, and o_nz 1818 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1819 One way to choose d_nz and o_nz is to use the max nonzerors per local 1820 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1821 In this case, the values of d_nz,o_nz are: 1822 .vb 1823 proc0 : dnz = 2, o_nz = 2 1824 proc1 : dnz = 3, o_nz = 2 1825 proc2 : dnz = 1, o_nz = 4 1826 .ve 1827 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1828 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1829 for proc3. i.e we are using 12+15+10=37 storage locations to store 1830 34 values. 1831 1832 When d_nnz, o_nnz parameters are specified, the storage is specified 1833 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1834 In the above case the values for d_nnz,o_nnz are: 1835 .vb 1836 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1837 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1838 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1839 .ve 1840 Here the space allocated is sum of all the above values i.e 34, and 1841 hence pre-allocation is perfect. 1842 1843 Level: intermediate 1844 1845 .keywords: matrix, aij, compressed row, sparse, parallel 1846 1847 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1848 @*/ 1849 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1850 { 1851 Mat B; 1852 Mat_MPIAIJ *b; 1853 int ierr, i,size,flag1 = 0, flag2 = 0; 1854 1855 PetscFunctionBegin; 1856 MPI_Comm_size(comm,&size); 1857 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1858 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1859 if (!flag1 && !flag2 && size == 1) { 1860 if (M == PETSC_DECIDE) M = m; 1861 if (N == PETSC_DECIDE) N = n; 1862 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 1866 *A = 0; 1867 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1868 PLogObjectCreate(B); 1869 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1870 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1871 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1872 B->ops->destroy = MatDestroy_MPIAIJ; 1873 B->ops->view = MatView_MPIAIJ; 1874 B->factor = 0; 1875 B->assembled = PETSC_FALSE; 1876 B->mapping = 0; 1877 1878 B->insertmode = NOT_SET_VALUES; 1879 b->size = size; 1880 MPI_Comm_rank(comm,&b->rank); 1881 1882 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1883 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1884 } 1885 1886 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1887 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1888 b->m = m; B->m = m; 1889 b->n = n; B->n = n; 1890 b->N = N; B->N = N; 1891 b->M = M; B->M = M; 1892 1893 /* the information in the maps duplicates the information computed below, eventually 1894 we should remove the duplicate information that is not contained in the maps */ 1895 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1896 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1897 1898 /* build local table of row and column ownerships */ 1899 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1900 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1901 b->cowners = b->rowners + b->size + 2; 1902 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1903 b->rowners[0] = 0; 1904 for ( i=2; i<=b->size; i++ ) { 1905 b->rowners[i] += b->rowners[i-1]; 1906 } 1907 b->rstart = b->rowners[b->rank]; 1908 b->rend = b->rowners[b->rank+1]; 1909 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1910 b->cowners[0] = 0; 1911 for ( i=2; i<=b->size; i++ ) { 1912 b->cowners[i] += b->cowners[i-1]; 1913 } 1914 b->cstart = b->cowners[b->rank]; 1915 b->cend = b->cowners[b->rank+1]; 1916 1917 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1918 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1919 PLogObjectParent(B,b->A); 1920 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1921 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1922 PLogObjectParent(B,b->B); 1923 1924 /* build cache for off array entries formed */ 1925 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1926 b->donotstash = 0; 1927 b->colmap = 0; 1928 b->garray = 0; 1929 b->roworiented = 1; 1930 1931 /* stuff used for matrix vector multiply */ 1932 b->lvec = 0; 1933 b->Mvctx = 0; 1934 1935 /* stuff for MatGetRow() */ 1936 b->rowindices = 0; 1937 b->rowvalues = 0; 1938 b->getrowactive = PETSC_FALSE; 1939 1940 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1941 "MatStoreValues_MPIAIJ", 1942 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1943 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1944 "MatRetrieveValues_MPIAIJ", 1945 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1946 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1947 "MatGetDiagonalBlock_MPIAIJ", 1948 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1949 *A = B; 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNC__ 1954 #define __FUNC__ "MatDuplicate_MPIAIJ" 1955 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1956 { 1957 Mat mat; 1958 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1959 int ierr, len=0, flg; 1960 1961 PetscFunctionBegin; 1962 *newmat = 0; 1963 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1964 PLogObjectCreate(mat); 1965 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1966 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1967 mat->ops->destroy = MatDestroy_MPIAIJ; 1968 mat->ops->view = MatView_MPIAIJ; 1969 mat->factor = matin->factor; 1970 mat->assembled = PETSC_TRUE; 1971 1972 a->m = mat->m = oldmat->m; 1973 a->n = mat->n = oldmat->n; 1974 a->M = mat->M = oldmat->M; 1975 a->N = mat->N = oldmat->N; 1976 1977 a->rstart = oldmat->rstart; 1978 a->rend = oldmat->rend; 1979 a->cstart = oldmat->cstart; 1980 a->cend = oldmat->cend; 1981 a->size = oldmat->size; 1982 a->rank = oldmat->rank; 1983 mat->insertmode = NOT_SET_VALUES; 1984 a->rowvalues = 0; 1985 a->getrowactive = PETSC_FALSE; 1986 1987 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1988 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1989 a->cowners = a->rowners + a->size + 2; 1990 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1991 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1992 if (oldmat->colmap) { 1993 #if defined (USE_CTABLE) 1994 ierr = TableCreateCopy( &a->colmap, oldmat->colmap ); CHKERRQ(ierr); 1995 #else 1996 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1997 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1998 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1999 #endif 2000 } else a->colmap = 0; 2001 if (oldmat->garray) { 2002 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 2003 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 2004 PLogObjectMemory(mat,len*sizeof(int)); 2005 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2006 } else a->garray = 0; 2007 2008 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2009 PLogObjectParent(mat,a->lvec); 2010 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2011 PLogObjectParent(mat,a->Mvctx); 2012 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2013 PLogObjectParent(mat,a->A); 2014 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2015 PLogObjectParent(mat,a->B); 2016 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2017 if (flg) { 2018 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2019 } 2020 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2021 *newmat = mat; 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #include "sys.h" 2026 2027 #undef __FUNC__ 2028 #define __FUNC__ "MatLoad_MPIAIJ" 2029 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2030 { 2031 Mat A; 2032 Scalar *vals,*svals; 2033 MPI_Comm comm = ((PetscObject)viewer)->comm; 2034 MPI_Status status; 2035 int i, nz, ierr, j,rstart, rend, fd; 2036 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2037 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2038 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2039 2040 PetscFunctionBegin; 2041 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2042 if (!rank) { 2043 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2044 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2045 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2046 if (header[3] < 0) { 2047 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2048 } 2049 } 2050 2051 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2052 M = header[1]; N = header[2]; 2053 /* determine ownership of all rows */ 2054 m = M/size + ((M % size) > rank); 2055 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2056 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2057 rowners[0] = 0; 2058 for ( i=2; i<=size; i++ ) { 2059 rowners[i] += rowners[i-1]; 2060 } 2061 rstart = rowners[rank]; 2062 rend = rowners[rank+1]; 2063 2064 /* distribute row lengths to all processors */ 2065 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2066 offlens = ourlens + (rend-rstart); 2067 if (!rank) { 2068 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2069 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2070 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2071 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2072 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2073 PetscFree(sndcounts); 2074 } else { 2075 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2076 } 2077 2078 if (!rank) { 2079 /* calculate the number of nonzeros on each processor */ 2080 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2081 PetscMemzero(procsnz,size*sizeof(int)); 2082 for ( i=0; i<size; i++ ) { 2083 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2084 procsnz[i] += rowlengths[j]; 2085 } 2086 } 2087 PetscFree(rowlengths); 2088 2089 /* determine max buffer needed and allocate it */ 2090 maxnz = 0; 2091 for ( i=0; i<size; i++ ) { 2092 maxnz = PetscMax(maxnz,procsnz[i]); 2093 } 2094 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2095 2096 /* read in my part of the matrix column indices */ 2097 nz = procsnz[0]; 2098 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2099 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2100 2101 /* read in every one elses and ship off */ 2102 for ( i=1; i<size; i++ ) { 2103 nz = procsnz[i]; 2104 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2105 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2106 } 2107 PetscFree(cols); 2108 } else { 2109 /* determine buffer space needed for message */ 2110 nz = 0; 2111 for ( i=0; i<m; i++ ) { 2112 nz += ourlens[i]; 2113 } 2114 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2115 2116 /* receive message of column indices*/ 2117 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2118 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2119 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2120 } 2121 2122 /* determine column ownership if matrix is not square */ 2123 if (N != M) { 2124 n = N/size + ((N % size) > rank); 2125 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2126 cstart = cend - n; 2127 } else { 2128 cstart = rstart; 2129 cend = rend; 2130 n = cend - cstart; 2131 } 2132 2133 /* loop over local rows, determining number of off diagonal entries */ 2134 PetscMemzero(offlens,m*sizeof(int)); 2135 jj = 0; 2136 for ( i=0; i<m; i++ ) { 2137 for ( j=0; j<ourlens[i]; j++ ) { 2138 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2139 jj++; 2140 } 2141 } 2142 2143 /* create our matrix */ 2144 for ( i=0; i<m; i++ ) { 2145 ourlens[i] -= offlens[i]; 2146 } 2147 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2148 A = *newmat; 2149 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2150 for ( i=0; i<m; i++ ) { 2151 ourlens[i] += offlens[i]; 2152 } 2153 2154 if (!rank) { 2155 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2156 2157 /* read in my part of the matrix numerical values */ 2158 nz = procsnz[0]; 2159 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2160 2161 /* insert into matrix */ 2162 jj = rstart; 2163 smycols = mycols; 2164 svals = vals; 2165 for ( i=0; i<m; i++ ) { 2166 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2167 smycols += ourlens[i]; 2168 svals += ourlens[i]; 2169 jj++; 2170 } 2171 2172 /* read in other processors and ship out */ 2173 for ( i=1; i<size; i++ ) { 2174 nz = procsnz[i]; 2175 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2176 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2177 } 2178 PetscFree(procsnz); 2179 } else { 2180 /* receive numeric values */ 2181 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2182 2183 /* receive message of values*/ 2184 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2185 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2186 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2187 2188 /* insert into matrix */ 2189 jj = rstart; 2190 smycols = mycols; 2191 svals = vals; 2192 for ( i=0; i<m; i++ ) { 2193 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2194 smycols += ourlens[i]; 2195 svals += ourlens[i]; 2196 jj++; 2197 } 2198 } 2199 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2200 2201 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2202 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNC__ 2207 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2208 /* 2209 Not great since it makes two copies of the submatrix, first an SeqAIJ 2210 in local and then by concatenating the local matrices the end result. 2211 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2212 */ 2213 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2214 { 2215 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2216 Mat *local,M, Mreuse; 2217 Scalar *vwork,*aa; 2218 MPI_Comm comm = mat->comm; 2219 Mat_SeqAIJ *aij; 2220 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2221 2222 PetscFunctionBegin; 2223 MPI_Comm_rank(comm,&rank); 2224 MPI_Comm_size(comm,&size); 2225 2226 if (call == MAT_REUSE_MATRIX) { 2227 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2228 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2229 local = &Mreuse; 2230 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2231 } else { 2232 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2233 Mreuse = *local; 2234 PetscFree(local); 2235 } 2236 2237 /* 2238 m - number of local rows 2239 n - number of columns (same on all processors) 2240 rstart - first row in new global matrix generated 2241 */ 2242 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2243 if (call == MAT_INITIAL_MATRIX) { 2244 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2245 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2246 ii = aij->i; 2247 jj = aij->j; 2248 2249 /* 2250 Determine the number of non-zeros in the diagonal and off-diagonal 2251 portions of the matrix in order to do correct preallocation 2252 */ 2253 2254 /* first get start and end of "diagonal" columns */ 2255 if (csize == PETSC_DECIDE) { 2256 nlocal = n/size + ((n % size) > rank); 2257 } else { 2258 nlocal = csize; 2259 } 2260 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2261 rstart = rend - nlocal; 2262 if (rank == size - 1 && rend != n) { 2263 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2264 } 2265 2266 /* next, compute all the lengths */ 2267 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2268 olens = dlens + m; 2269 for ( i=0; i<m; i++ ) { 2270 jend = ii[i+1] - ii[i]; 2271 olen = 0; 2272 dlen = 0; 2273 for ( j=0; j<jend; j++ ) { 2274 if ( *jj < rstart || *jj >= rend) olen++; 2275 else dlen++; 2276 jj++; 2277 } 2278 olens[i] = olen; 2279 dlens[i] = dlen; 2280 } 2281 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2282 PetscFree(dlens); 2283 } else { 2284 int ml,nl; 2285 2286 M = *newmat; 2287 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2288 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2289 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2290 /* 2291 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2292 rather than the slower MatSetValues(). 2293 */ 2294 M->was_assembled = PETSC_TRUE; 2295 M->assembled = PETSC_FALSE; 2296 } 2297 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2298 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2299 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2300 ii = aij->i; 2301 jj = aij->j; 2302 aa = aij->a; 2303 for (i=0; i<m; i++) { 2304 row = rstart + i; 2305 nz = ii[i+1] - ii[i]; 2306 cwork = jj; jj += nz; 2307 vwork = aa; aa += nz; 2308 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2309 } 2310 2311 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2312 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2313 *newmat = M; 2314 2315 /* save submatrix used in processor for next request */ 2316 if (call == MAT_INITIAL_MATRIX) { 2317 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2318 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2319 } 2320 2321 PetscFunctionReturn(0); 2322 } 2323