1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.273 1999/01/11 16:21:28 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 #if defined (USE_CTABLE) 22 int ierr; 23 #endif 24 25 PetscFunctionBegin; 26 #if defined (USE_CTABLE) 27 ierr = TableCreate( &aij->colmap, aij->n/5 ); CHKERRQ(ierr); 28 for ( i=0; i<n; i++ ){ 29 ierr = TableAdd( aij->colmap, aij->garray[i] + 1, i+1 );CHKERRQ(ierr); 30 } 31 #else 32 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 33 PLogObjectMemory(mat,aij->N*sizeof(int)); 34 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 35 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 36 #endif 37 PetscFunctionReturn(0); 38 } 39 40 extern int DisAssemble_MPIAIJ(Mat); 41 42 #define CHUNKSIZE 15 43 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 44 { \ 45 \ 46 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 47 rmax = aimax[row]; nrow = ailen[row]; \ 48 col1 = col - shift; \ 49 \ 50 low = 0; high = nrow; \ 51 while (high-low > 5) { \ 52 t = (low+high)/2; \ 53 if (rp[t] > col) high = t; \ 54 else low = t; \ 55 } \ 56 for ( _i=0; _i<nrow; _i++ ) { \ 57 if (rp[_i] > col1) break; \ 58 if (rp[_i] == col1) { \ 59 if (addv == ADD_VALUES) ap[_i] += value; \ 60 else ap[_i] = value; \ 61 goto a_noinsert; \ 62 } \ 63 } \ 64 if (nonew == 1) goto a_noinsert; \ 65 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 66 if (nrow >= rmax) { \ 67 /* there is no extra room in row, therefore enlarge */ \ 68 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 69 Scalar *new_a; \ 70 \ 71 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 72 \ 73 /* malloc new storage space */ \ 74 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 75 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 76 new_j = (int *) (new_a + new_nz); \ 77 new_i = new_j + new_nz; \ 78 \ 79 /* copy over old data into new slots */ \ 80 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 81 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 82 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 83 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 84 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 85 len*sizeof(int)); \ 86 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 87 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 88 len*sizeof(Scalar)); \ 89 /* free up old matrix storage */ \ 90 \ 91 PetscFree(a->a); \ 92 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 93 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 94 a->singlemalloc = 1; \ 95 \ 96 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 97 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 98 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 99 a->maxnz += CHUNKSIZE; \ 100 a->reallocs++; \ 101 } \ 102 N = nrow++ - 1; a->nz++; \ 103 /* shift up all the later entries in this row */ \ 104 for ( ii=N; ii>=_i; ii-- ) { \ 105 rp[ii+1] = rp[ii]; \ 106 ap[ii+1] = ap[ii]; \ 107 } \ 108 rp[_i] = col1; \ 109 ap[_i] = value; \ 110 a_noinsert: ; \ 111 ailen[row] = nrow; \ 112 } 113 114 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 118 rmax = bimax[row]; nrow = bilen[row]; \ 119 col1 = col - shift; \ 120 \ 121 low = 0; high = nrow; \ 122 while (high-low > 5) { \ 123 t = (low+high)/2; \ 124 if (rp[t] > col) high = t; \ 125 else low = t; \ 126 } \ 127 for ( _i=0; _i<nrow; _i++ ) { \ 128 if (rp[_i] > col1) break; \ 129 if (rp[_i] == col1) { \ 130 if (addv == ADD_VALUES) ap[_i] += value; \ 131 else ap[_i] = value; \ 132 goto b_noinsert; \ 133 } \ 134 } \ 135 if (nonew == 1) goto b_noinsert; \ 136 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 137 if (nrow >= rmax) { \ 138 /* there is no extra room in row, therefore enlarge */ \ 139 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 140 Scalar *new_a; \ 141 \ 142 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 143 \ 144 /* malloc new storage space */ \ 145 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 146 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 147 new_j = (int *) (new_a + new_nz); \ 148 new_i = new_j + new_nz; \ 149 \ 150 /* copy over old data into new slots */ \ 151 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 152 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 153 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 154 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 155 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 156 len*sizeof(int)); \ 157 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 158 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 159 len*sizeof(Scalar)); \ 160 /* free up old matrix storage */ \ 161 \ 162 PetscFree(b->a); \ 163 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 164 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 165 b->singlemalloc = 1; \ 166 \ 167 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 168 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 169 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 170 b->maxnz += CHUNKSIZE; \ 171 b->reallocs++; \ 172 } \ 173 N = nrow++ - 1; b->nz++; \ 174 /* shift up all the later entries in this row */ \ 175 for ( ii=N; ii>=_i; ii-- ) { \ 176 rp[ii+1] = rp[ii]; \ 177 ap[ii+1] = ap[ii]; \ 178 } \ 179 rp[_i] = col1; \ 180 ap[_i] = value; \ 181 b_noinsert: ; \ 182 bilen[row] = nrow; \ 183 } 184 185 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 186 #undef __FUNC__ 187 #define __FUNC__ "MatSetValues_MPIAIJ" 188 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 191 Scalar value; 192 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 193 int cstart = aij->cstart, cend = aij->cend,row,col; 194 int roworiented = aij->roworiented; 195 196 /* Some Variables required in the macro */ 197 Mat A = aij->A; 198 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 199 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 200 Scalar *aa = a->a; 201 202 Mat B = aij->B; 203 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 204 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 205 Scalar *ba = b->a; 206 207 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 208 int nonew = a->nonew,shift = a->indexshift; 209 Scalar *ap; 210 211 PetscFunctionBegin; 212 for ( i=0; i<m; i++ ) { 213 #if defined(USE_PETSC_BOPT_g) 214 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 215 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 216 #endif 217 if (im[i] >= rstart && im[i] < rend) { 218 row = im[i] - rstart; 219 for ( j=0; j<n; j++ ) { 220 if (in[j] >= cstart && in[j] < cend){ 221 col = in[j] - cstart; 222 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } 226 #if defined(USE_PETSC_BOPT_g) 227 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 228 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 229 #endif 230 else { 231 if (mat->was_assembled) { 232 if (!aij->colmap) { 233 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 234 } 235 #if defined (USE_CTABLE) 236 col = TableFind( aij->colmap, in[j] + 1 ) - 1; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ *) B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } 256 else { 257 if (roworiented && !aij->donotstash) { 258 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 259 } 260 else { 261 if (!aij->donotstash) { 262 row = im[i]; 263 for ( j=0; j<n; j++ ) { 264 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNC__ 274 #define __FUNC__ "MatGetValues_MPIAIJ" 275 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 278 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 279 int cstart = aij->cstart, cend = aij->cend,row,col; 280 281 PetscFunctionBegin; 282 for ( i=0; i<m; i++ ) { 283 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 284 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 285 if (idxm[i] >= rstart && idxm[i] < rend) { 286 row = idxm[i] - rstart; 287 for ( j=0; j<n; j++ ) { 288 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 289 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 290 if (idxn[j] >= cstart && idxn[j] < cend){ 291 col = idxn[j] - cstart; 292 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 293 } else { 294 if (!aij->colmap) { 295 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 296 } 297 #if defined (USE_CTABLE) 298 col = TableFind( aij->colmap, idxn[j] + 1 ) - 1; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNC__ 316 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 317 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 320 MPI_Comm comm = mat->comm; 321 int size = aij->size, *owners = aij->rowners; 322 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 323 MPI_Request *send_waits,*recv_waits; 324 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 325 InsertMode addv; 326 Scalar *rvalues,*svalues; 327 328 PetscFunctionBegin; 329 if (aij->donotstash) { 330 aij->svalues = 0; aij->rvalues = 0; 331 aij->nsends = 0; aij->nrecvs = 0; 332 aij->send_waits = 0; aij->recv_waits = 0; 333 aij->rmax = 0; 334 PetscFunctionReturn(0); 335 } 336 337 /* make sure all processors are either in INSERTMODE or ADDMODE */ 338 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 339 if (addv == (ADD_VALUES|INSERT_VALUES)) { 340 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 341 } 342 mat->insertmode = addv; /* in case this processor had no cache */ 343 344 /* first count number of contributors to each processor */ 345 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 346 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 347 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 348 for ( i=0; i<aij->stash.n; i++ ) { 349 idx = aij->stash.idx[i]; 350 for ( j=0; j<size; j++ ) { 351 if (idx >= owners[j] && idx < owners[j+1]) { 352 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 353 } 354 } 355 } 356 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 357 358 /* inform other processors of number of messages and max length*/ 359 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 360 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 361 nreceives = work[rank]; 362 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 363 nmax = work[rank]; 364 PetscFree(work); 365 366 /* post receives: 367 1) each message will consist of ordered pairs 368 (global index,value) we store the global index as a double 369 to simplify the message passing. 370 2) since we don't know how long each individual message is we 371 allocate the largest needed buffer for each receive. Potentially 372 this is a lot of wasted space. 373 374 375 This could be done better. 376 */ 377 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 378 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 379 for ( i=0; i<nreceives; i++ ) { 380 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 381 comm,recv_waits+i);CHKERRQ(ierr); 382 } 383 384 /* do sends: 385 1) starts[i] gives the starting index in svalues for stuff going to 386 the ith processor 387 */ 388 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 389 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 390 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 391 starts[0] = 0; 392 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 393 for ( i=0; i<aij->stash.n; i++ ) { 394 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 395 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 396 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 397 } 398 PetscFree(owner); 399 starts[0] = 0; 400 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 401 count = 0; 402 for ( i=0; i<size; i++ ) { 403 if (procs[i]) { 404 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 405 } 406 } 407 PetscFree(starts); 408 PetscFree(nprocs); 409 410 /* Free cache space */ 411 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 412 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 413 414 aij->svalues = svalues; aij->rvalues = rvalues; 415 aij->nsends = nsends; aij->nrecvs = nreceives; 416 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 417 aij->rmax = nmax; 418 419 PetscFunctionReturn(0); 420 } 421 extern int MatSetUpMultiply_MPIAIJ(Mat); 422 423 #undef __FUNC__ 424 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 425 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 426 { 427 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 428 MPI_Status *send_status,recv_status; 429 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 430 int row,col,other_disassembled; 431 Scalar *values,val; 432 InsertMode addv = mat->insertmode; 433 434 PetscFunctionBegin; 435 436 /* wait on receives */ 437 while (count) { 438 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 439 /* unpack receives into our local space */ 440 values = aij->rvalues + 3*imdex*aij->rmax; 441 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 442 n = n/3; 443 for ( i=0; i<n; i++ ) { 444 row = (int) PetscReal(values[3*i]) - aij->rstart; 445 col = (int) PetscReal(values[3*i+1]); 446 val = values[3*i+2]; 447 if (col >= aij->cstart && col < aij->cend) { 448 col -= aij->cstart; 449 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 450 } else { 451 if (mat->was_assembled || mat->assembled) { 452 if (!aij->colmap) { 453 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 454 } 455 #if defined (USE_CTABLE) 456 col = TableFind( aij->colmap, col + 1 ) - 1; 457 #else 458 col = aij->colmap[col] - 1; 459 #endif 460 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 461 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 462 col = (int) PetscReal(values[3*i+1]); 463 } 464 } 465 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 466 } 467 } 468 count--; 469 } 470 if (aij->recv_waits) PetscFree(aij->recv_waits); 471 if (aij->rvalues) PetscFree(aij->rvalues); 472 473 /* wait on sends */ 474 if (aij->nsends) { 475 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 476 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 477 PetscFree(send_status); 478 } 479 if (aij->send_waits) PetscFree(aij->send_waits); 480 if (aij->svalues) PetscFree(aij->svalues); 481 482 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 483 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 484 485 /* determine if any processor has disassembled, if so we must 486 also disassemble ourselfs, in order that we may reassemble. */ 487 /* 488 if nonzero structure of submatrix B cannot change then we know that 489 no processor disassembled thus we can skip this stuff 490 */ 491 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 492 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 493 if (mat->was_assembled && !other_disassembled) { 494 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 495 } 496 } 497 498 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 499 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 500 } 501 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 502 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 503 504 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNC__ 509 #define __FUNC__ "MatZeroEntries_MPIAIJ" 510 int MatZeroEntries_MPIAIJ(Mat A) 511 { 512 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 513 int ierr; 514 515 PetscFunctionBegin; 516 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 517 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNC__ 522 #define __FUNC__ "MatZeroRows_MPIAIJ" 523 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 524 { 525 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 526 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 527 int *procs,*nprocs,j,found,idx,nsends,*work,row; 528 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 529 int *rvalues,tag = A->tag,count,base,slen,n,*source; 530 int *lens,imdex,*lrows,*values,rstart=l->rstart; 531 MPI_Comm comm = A->comm; 532 MPI_Request *send_waits,*recv_waits; 533 MPI_Status recv_status,*send_status; 534 IS istmp; 535 PetscTruth localdiag; 536 537 PetscFunctionBegin; 538 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 539 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 540 541 /* first count number of contributors to each processor */ 542 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 543 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 544 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 545 for ( i=0; i<N; i++ ) { 546 idx = rows[i]; 547 found = 0; 548 for ( j=0; j<size; j++ ) { 549 if (idx >= owners[j] && idx < owners[j+1]) { 550 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 551 } 552 } 553 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 554 } 555 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 556 557 /* inform other processors of number of messages and max length*/ 558 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 559 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 560 nrecvs = work[rank]; 561 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 562 nmax = work[rank]; 563 PetscFree(work); 564 565 /* post receives: */ 566 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 567 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 568 for ( i=0; i<nrecvs; i++ ) { 569 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 570 } 571 572 /* do sends: 573 1) starts[i] gives the starting index in svalues for stuff going to 574 the ith processor 575 */ 576 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 577 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 578 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 579 starts[0] = 0; 580 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 581 for ( i=0; i<N; i++ ) { 582 svalues[starts[owner[i]]++] = rows[i]; 583 } 584 ISRestoreIndices(is,&rows); 585 586 starts[0] = 0; 587 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 588 count = 0; 589 for ( i=0; i<size; i++ ) { 590 if (procs[i]) { 591 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 592 } 593 } 594 PetscFree(starts); 595 596 base = owners[rank]; 597 598 /* wait on receives */ 599 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 600 source = lens + nrecvs; 601 count = nrecvs; slen = 0; 602 while (count) { 603 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 604 /* unpack receives into our local space */ 605 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 606 source[imdex] = recv_status.MPI_SOURCE; 607 lens[imdex] = n; 608 slen += n; 609 count--; 610 } 611 PetscFree(recv_waits); 612 613 /* move the data into the send scatter */ 614 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 615 count = 0; 616 for ( i=0; i<nrecvs; i++ ) { 617 values = rvalues + i*nmax; 618 for ( j=0; j<lens[i]; j++ ) { 619 lrows[count++] = values[j] - base; 620 } 621 } 622 PetscFree(rvalues); PetscFree(lens); 623 PetscFree(owner); PetscFree(nprocs); 624 625 /* actually zap the local rows */ 626 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 627 PLogObjectParent(A,istmp); 628 629 /* 630 Zero the required rows. If the "diagonal block" of the matrix 631 is square and the user wishes to set the diagonal we use seperate 632 code so that MatSetValues() is not called for each diagonal allocating 633 new memory, thus calling lots of mallocs and slowing things down. 634 635 Contributed by: Mathew Knepley 636 */ 637 localdiag = PETSC_FALSE; 638 if (diag && (l->A->M == l->A->N)) { 639 localdiag = PETSC_TRUE; 640 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 641 } else { 642 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 643 } 644 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 645 ierr = ISDestroy(istmp); CHKERRQ(ierr); 646 647 if (diag && (localdiag == PETSC_FALSE)) { 648 for ( i = 0; i < slen; i++ ) { 649 row = lrows[i] + rstart; 650 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 651 } 652 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 653 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 654 } 655 656 if (diag) { 657 for ( i = 0; i < slen; i++ ) { 658 row = lrows[i] + rstart; 659 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 660 } 661 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 662 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 663 } 664 PetscFree(lrows); 665 666 /* wait on sends */ 667 if (nsends) { 668 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 669 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 670 PetscFree(send_status); 671 } 672 PetscFree(send_waits); PetscFree(svalues); 673 674 PetscFunctionReturn(0); 675 } 676 677 #undef __FUNC__ 678 #define __FUNC__ "MatMult_MPIAIJ" 679 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 680 { 681 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 682 int ierr,nt; 683 684 PetscFunctionBegin; 685 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 686 if (nt != a->n) { 687 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 688 } 689 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 690 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 691 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 692 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 693 PetscFunctionReturn(0); 694 } 695 696 #undef __FUNC__ 697 #define __FUNC__ "MatMultAdd_MPIAIJ" 698 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 699 { 700 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 701 int ierr; 702 703 PetscFunctionBegin; 704 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 705 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 706 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 707 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNC__ 712 #define __FUNC__ "MatMultTrans_MPIAIJ" 713 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 716 int ierr; 717 718 PetscFunctionBegin; 719 /* do nondiagonal part */ 720 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 721 /* send it on its way */ 722 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 723 /* do local part */ 724 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 725 /* receive remote parts: note this assumes the values are not actually */ 726 /* inserted in yy until the next line, which is true for my implementation*/ 727 /* but is not perhaps always true. */ 728 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNC__ 733 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 734 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 735 { 736 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 737 int ierr; 738 739 PetscFunctionBegin; 740 /* do nondiagonal part */ 741 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 742 /* send it on its way */ 743 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 744 /* do local part */ 745 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 746 /* receive remote parts: note this assumes the values are not actually */ 747 /* inserted in yy until the next line, which is true for my implementation*/ 748 /* but is not perhaps always true. */ 749 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 /* 754 This only works correctly for square matrices where the subblock A->A is the 755 diagonal block 756 */ 757 #undef __FUNC__ 758 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 759 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 760 { 761 int ierr; 762 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 763 764 PetscFunctionBegin; 765 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 766 if (a->rstart != a->cstart || a->rend != a->cend) { 767 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 768 } 769 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNC__ 774 #define __FUNC__ "MatScale_MPIAIJ" 775 int MatScale_MPIAIJ(Scalar *aa,Mat A) 776 { 777 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 778 int ierr; 779 780 PetscFunctionBegin; 781 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 782 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 783 PetscFunctionReturn(0); 784 } 785 786 #undef __FUNC__ 787 #define __FUNC__ "MatDestroy_MPIAIJ" 788 int MatDestroy_MPIAIJ(Mat mat) 789 { 790 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 791 int ierr; 792 793 PetscFunctionBegin; 794 if (--mat->refct > 0) PetscFunctionReturn(0); 795 796 if (mat->mapping) { 797 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 798 } 799 if (mat->bmapping) { 800 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 801 } 802 if (mat->rmap) { 803 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 804 } 805 if (mat->cmap) { 806 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 807 } 808 #if defined(USE_PETSC_LOG) 809 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 810 #endif 811 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 812 PetscFree(aij->rowners); 813 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 814 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 815 #if defined (USE_CTABLE) 816 if (aij->colmap) TableDelete(aij->colmap); 817 #else 818 if (aij->colmap) PetscFree(aij->colmap); 819 #endif 820 if (aij->garray) PetscFree(aij->garray); 821 if (aij->lvec) VecDestroy(aij->lvec); 822 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 823 if (aij->rowvalues) PetscFree(aij->rowvalues); 824 PetscFree(aij); 825 PLogObjectDestroy(mat); 826 PetscHeaderDestroy(mat); 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNC__ 831 #define __FUNC__ "MatView_MPIAIJ_Binary" 832 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 833 { 834 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 835 int ierr; 836 837 PetscFunctionBegin; 838 if (aij->size == 1) { 839 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 840 } 841 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 842 PetscFunctionReturn(0); 843 } 844 845 #undef __FUNC__ 846 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 847 extern int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 848 { 849 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 850 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 851 int ierr, format,shift = C->indexshift,rank = aij->rank ; 852 int size = aij->size; 853 FILE *fd; 854 ViewerType vtype; 855 856 PetscFunctionBegin; 857 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 858 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 859 ierr = ViewerGetFormat(viewer,&format); 860 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 861 MatInfo info; 862 int flg; 863 MPI_Comm_rank(mat->comm,&rank); 864 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 865 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 866 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 867 PetscSequentialPhaseBegin(mat->comm,1); 868 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 869 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 870 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 871 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 872 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 873 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 874 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 875 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 876 fflush(fd); 877 PetscSequentialPhaseEnd(mat->comm,1); 878 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 881 PetscFunctionReturn(0); 882 } 883 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 884 Draw draw; 885 PetscTruth isnull; 886 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 887 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 888 } 889 890 if (size == 1) { 891 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 892 } else { 893 /* assemble the entire matrix onto first processor. */ 894 Mat A; 895 Mat_SeqAIJ *Aloc; 896 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 897 Scalar *a; 898 899 if (!rank) { 900 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 901 } else { 902 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 903 } 904 PLogObjectParent(mat,A); 905 906 /* copy over the A part */ 907 Aloc = (Mat_SeqAIJ*) aij->A->data; 908 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 909 row = aij->rstart; 910 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 911 for ( i=0; i<m; i++ ) { 912 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 913 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 914 } 915 aj = Aloc->j; 916 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 917 918 /* copy over the B part */ 919 Aloc = (Mat_SeqAIJ*) aij->B->data; 920 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 921 row = aij->rstart; 922 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 923 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 924 for ( i=0; i<m; i++ ) { 925 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 926 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 927 } 928 PetscFree(ct); 929 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 930 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 931 /* 932 Everyone has to call to draw the matrix since the graphics waits are 933 synchronized across all processors that share the Draw object 934 */ 935 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 936 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 937 } 938 ierr = MatDestroy(A); CHKERRQ(ierr); 939 } 940 PetscFunctionReturn(0); 941 } 942 943 #undef __FUNC__ 944 #define __FUNC__ "MatView_MPIAIJ" 945 int MatView_MPIAIJ(Mat mat,Viewer viewer) 946 { 947 int ierr; 948 ViewerType vtype; 949 950 PetscFunctionBegin; 951 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 952 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 953 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 954 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 955 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 956 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 957 } else { 958 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 959 } 960 PetscFunctionReturn(0); 961 } 962 963 /* 964 This has to provide several versions. 965 966 2) a) use only local smoothing updating outer values only once. 967 b) local smoothing updating outer values each inner iteration 968 3) color updating out values betwen colors. 969 */ 970 #undef __FUNC__ 971 #define __FUNC__ "MatRelax_MPIAIJ" 972 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 973 double fshift,int its,Vec xx) 974 { 975 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 976 Mat AA = mat->A, BB = mat->B; 977 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 978 Scalar *b,*x,*xs,*ls,d,*v,sum; 979 int ierr,*idx, *diag; 980 int n = mat->n, m = mat->m, i,shift = A->indexshift; 981 982 PetscFunctionBegin; 983 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 984 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 985 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 986 xs = x + shift; /* shift by one for index start of 1 */ 987 ls = ls + shift; 988 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 989 diag = A->diag; 990 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 991 if (flag & SOR_ZERO_INITIAL_GUESS) { 992 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 993 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 994 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 995 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 999 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1000 while (its--) { 1001 /* go down through the rows */ 1002 for ( i=0; i<m; i++ ) { 1003 n = A->i[i+1] - A->i[i]; 1004 PLogFlops(4*n+3); 1005 idx = A->j + A->i[i] + shift; 1006 v = A->a + A->i[i] + shift; 1007 sum = b[i]; 1008 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1009 d = fshift + A->a[diag[i]+shift]; 1010 n = B->i[i+1] - B->i[i]; 1011 idx = B->j + B->i[i] + shift; 1012 v = B->a + B->i[i] + shift; 1013 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1014 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1015 } 1016 /* come up through the rows */ 1017 for ( i=m-1; i>-1; i-- ) { 1018 n = A->i[i+1] - A->i[i]; 1019 PLogFlops(4*n+3) 1020 idx = A->j + A->i[i] + shift; 1021 v = A->a + A->i[i] + shift; 1022 sum = b[i]; 1023 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1024 d = fshift + A->a[diag[i]+shift]; 1025 n = B->i[i+1] - B->i[i]; 1026 idx = B->j + B->i[i] + shift; 1027 v = B->a + B->i[i] + shift; 1028 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1029 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1030 } 1031 } 1032 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1035 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1036 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1037 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1042 while (its--) { 1043 for ( i=0; i<m; i++ ) { 1044 n = A->i[i+1] - A->i[i]; 1045 PLogFlops(4*n+3); 1046 idx = A->j + A->i[i] + shift; 1047 v = A->a + A->i[i] + shift; 1048 sum = b[i]; 1049 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1050 d = fshift + A->a[diag[i]+shift]; 1051 n = B->i[i+1] - B->i[i]; 1052 idx = B->j + B->i[i] + shift; 1053 v = B->a + B->i[i] + shift; 1054 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1055 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1056 } 1057 } 1058 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1059 if (flag & SOR_ZERO_INITIAL_GUESS) { 1060 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1062 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1063 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1067 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 while (its--) { 1069 for ( i=m-1; i>-1; i-- ) { 1070 n = A->i[i+1] - A->i[i]; 1071 PLogFlops(4*n+3); 1072 idx = A->j + A->i[i] + shift; 1073 v = A->a + A->i[i] + shift; 1074 sum = b[i]; 1075 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1076 d = fshift + A->a[diag[i]+shift]; 1077 n = B->i[i+1] - B->i[i]; 1078 idx = B->j + B->i[i] + shift; 1079 v = B->a + B->i[i] + shift; 1080 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1081 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1082 } 1083 } 1084 } else { 1085 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1086 } 1087 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1088 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1089 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNC__ 1094 #define __FUNC__ "MatGetInfo_MPIAIJ" 1095 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1096 { 1097 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1098 Mat A = mat->A, B = mat->B; 1099 int ierr; 1100 double isend[5], irecv[5]; 1101 1102 PetscFunctionBegin; 1103 info->block_size = 1.0; 1104 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1105 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1106 isend[3] = info->memory; isend[4] = info->mallocs; 1107 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1108 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1109 isend[3] += info->memory; isend[4] += info->mallocs; 1110 if (flag == MAT_LOCAL) { 1111 info->nz_used = isend[0]; 1112 info->nz_allocated = isend[1]; 1113 info->nz_unneeded = isend[2]; 1114 info->memory = isend[3]; 1115 info->mallocs = isend[4]; 1116 } else if (flag == MAT_GLOBAL_MAX) { 1117 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1118 info->nz_used = irecv[0]; 1119 info->nz_allocated = irecv[1]; 1120 info->nz_unneeded = irecv[2]; 1121 info->memory = irecv[3]; 1122 info->mallocs = irecv[4]; 1123 } else if (flag == MAT_GLOBAL_SUM) { 1124 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1125 info->nz_used = irecv[0]; 1126 info->nz_allocated = irecv[1]; 1127 info->nz_unneeded = irecv[2]; 1128 info->memory = irecv[3]; 1129 info->mallocs = irecv[4]; 1130 } 1131 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1132 info->fill_ratio_needed = 0; 1133 info->factor_mallocs = 0; 1134 info->rows_global = (double)mat->M; 1135 info->columns_global = (double)mat->N; 1136 info->rows_local = (double)mat->m; 1137 info->columns_local = (double)mat->N; 1138 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNC__ 1143 #define __FUNC__ "MatSetOption_MPIAIJ" 1144 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1147 1148 PetscFunctionBegin; 1149 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1150 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1151 op == MAT_COLUMNS_UNSORTED || 1152 op == MAT_COLUMNS_SORTED || 1153 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1154 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1155 MatSetOption(a->A,op); 1156 MatSetOption(a->B,op); 1157 } else if (op == MAT_ROW_ORIENTED) { 1158 a->roworiented = 1; 1159 MatSetOption(a->A,op); 1160 MatSetOption(a->B,op); 1161 } else if (op == MAT_ROWS_SORTED || 1162 op == MAT_ROWS_UNSORTED || 1163 op == MAT_SYMMETRIC || 1164 op == MAT_STRUCTURALLY_SYMMETRIC || 1165 op == MAT_YES_NEW_DIAGONALS) 1166 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1167 else if (op == MAT_COLUMN_ORIENTED) { 1168 a->roworiented = 0; 1169 MatSetOption(a->A,op); 1170 MatSetOption(a->B,op); 1171 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1172 a->donotstash = 1; 1173 } else if (op == MAT_NO_NEW_DIAGONALS){ 1174 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1175 } else { 1176 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1177 } 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNC__ 1182 #define __FUNC__ "MatGetSize_MPIAIJ" 1183 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1184 { 1185 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1186 1187 PetscFunctionBegin; 1188 if (m) *m = mat->M; 1189 if (n) *n = mat->N; 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNC__ 1194 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1195 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1196 { 1197 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1198 1199 PetscFunctionBegin; 1200 if (m) *m = mat->m; 1201 if (n) *n = mat->n; 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1207 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1208 { 1209 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1210 1211 PetscFunctionBegin; 1212 *m = mat->rstart; *n = mat->rend; 1213 PetscFunctionReturn(0); 1214 } 1215 1216 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1217 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1218 1219 #undef __FUNC__ 1220 #define __FUNC__ "MatGetRow_MPIAIJ" 1221 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1222 { 1223 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1224 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1225 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1226 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1227 int *cmap, *idx_p; 1228 1229 PetscFunctionBegin; 1230 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1231 mat->getrowactive = PETSC_TRUE; 1232 1233 if (!mat->rowvalues && (idx || v)) { 1234 /* 1235 allocate enough space to hold information from the longest row. 1236 */ 1237 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1238 int max = 1,m = mat->m,tmp; 1239 for ( i=0; i<m; i++ ) { 1240 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1241 if (max < tmp) { max = tmp; } 1242 } 1243 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1244 mat->rowindices = (int *) (mat->rowvalues + max); 1245 } 1246 1247 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1248 lrow = row - rstart; 1249 1250 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1251 if (!v) {pvA = 0; pvB = 0;} 1252 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1253 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1254 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1255 nztot = nzA + nzB; 1256 1257 cmap = mat->garray; 1258 if (v || idx) { 1259 if (nztot) { 1260 /* Sort by increasing column numbers, assuming A and B already sorted */ 1261 int imark = -1; 1262 if (v) { 1263 *v = v_p = mat->rowvalues; 1264 for ( i=0; i<nzB; i++ ) { 1265 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1266 else break; 1267 } 1268 imark = i; 1269 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1270 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1271 } 1272 if (idx) { 1273 *idx = idx_p = mat->rowindices; 1274 if (imark > -1) { 1275 for ( i=0; i<imark; i++ ) { 1276 idx_p[i] = cmap[cworkB[i]]; 1277 } 1278 } else { 1279 for ( i=0; i<nzB; i++ ) { 1280 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1281 else break; 1282 } 1283 imark = i; 1284 } 1285 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1286 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1287 } 1288 } else { 1289 if (idx) *idx = 0; 1290 if (v) *v = 0; 1291 } 1292 } 1293 *nz = nztot; 1294 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1295 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNC__ 1300 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1301 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1302 { 1303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1304 1305 PetscFunctionBegin; 1306 if (aij->getrowactive == PETSC_FALSE) { 1307 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1308 } 1309 aij->getrowactive = PETSC_FALSE; 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNC__ 1314 #define __FUNC__ "MatNorm_MPIAIJ" 1315 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1316 { 1317 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1318 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1319 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1320 double sum = 0.0; 1321 Scalar *v; 1322 1323 PetscFunctionBegin; 1324 if (aij->size == 1) { 1325 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1326 } else { 1327 if (type == NORM_FROBENIUS) { 1328 v = amat->a; 1329 for (i=0; i<amat->nz; i++ ) { 1330 #if defined(USE_PETSC_COMPLEX) 1331 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1332 #else 1333 sum += (*v)*(*v); v++; 1334 #endif 1335 } 1336 v = bmat->a; 1337 for (i=0; i<bmat->nz; i++ ) { 1338 #if defined(USE_PETSC_COMPLEX) 1339 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1340 #else 1341 sum += (*v)*(*v); v++; 1342 #endif 1343 } 1344 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1345 *norm = sqrt(*norm); 1346 } else if (type == NORM_1) { /* max column norm */ 1347 double *tmp, *tmp2; 1348 int *jj, *garray = aij->garray; 1349 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1350 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1351 PetscMemzero(tmp,aij->N*sizeof(double)); 1352 *norm = 0.0; 1353 v = amat->a; jj = amat->j; 1354 for ( j=0; j<amat->nz; j++ ) { 1355 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1356 } 1357 v = bmat->a; jj = bmat->j; 1358 for ( j=0; j<bmat->nz; j++ ) { 1359 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1360 } 1361 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1362 for ( j=0; j<aij->N; j++ ) { 1363 if (tmp2[j] > *norm) *norm = tmp2[j]; 1364 } 1365 PetscFree(tmp); PetscFree(tmp2); 1366 } else if (type == NORM_INFINITY) { /* max row norm */ 1367 double ntemp = 0.0; 1368 for ( j=0; j<amat->m; j++ ) { 1369 v = amat->a + amat->i[j] + shift; 1370 sum = 0.0; 1371 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1372 sum += PetscAbsScalar(*v); v++; 1373 } 1374 v = bmat->a + bmat->i[j] + shift; 1375 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1376 sum += PetscAbsScalar(*v); v++; 1377 } 1378 if (sum > ntemp) ntemp = sum; 1379 } 1380 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1381 } else { 1382 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1383 } 1384 } 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNC__ 1389 #define __FUNC__ "MatTranspose_MPIAIJ" 1390 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1391 { 1392 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1393 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1394 int ierr,shift = Aloc->indexshift; 1395 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1396 Mat B; 1397 Scalar *array; 1398 1399 PetscFunctionBegin; 1400 if (matout == PETSC_NULL && M != N) { 1401 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1402 } 1403 1404 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1405 1406 /* copy over the A part */ 1407 Aloc = (Mat_SeqAIJ*) a->A->data; 1408 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1409 row = a->rstart; 1410 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1411 for ( i=0; i<m; i++ ) { 1412 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1413 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1414 } 1415 aj = Aloc->j; 1416 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1417 1418 /* copy over the B part */ 1419 Aloc = (Mat_SeqAIJ*) a->B->data; 1420 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1421 row = a->rstart; 1422 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1423 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1424 for ( i=0; i<m; i++ ) { 1425 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1426 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1427 } 1428 PetscFree(ct); 1429 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1430 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1431 if (matout != PETSC_NULL) { 1432 *matout = B; 1433 } else { 1434 PetscOps *Abops; 1435 struct _MatOps *Aops; 1436 1437 /* This isn't really an in-place transpose .... but free data structures from a */ 1438 PetscFree(a->rowners); 1439 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1440 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1441 #if defined (USE_CTABLE) 1442 if (a->colmap) TableDelete(a->colmap); 1443 #else 1444 if (a->colmap) PetscFree(a->colmap); 1445 #endif 1446 if (a->garray) PetscFree(a->garray); 1447 if (a->lvec) VecDestroy(a->lvec); 1448 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1449 PetscFree(a); 1450 1451 /* 1452 This is horrible, horrible code. We need to keep the 1453 A pointers for the bops and ops but copy everything 1454 else from C. 1455 */ 1456 Abops = A->bops; 1457 Aops = A->ops; 1458 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1459 A->bops = Abops; 1460 A->ops = Aops; 1461 PetscHeaderDestroy(B); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNC__ 1467 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1468 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1469 { 1470 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1471 Mat a = aij->A, b = aij->B; 1472 int ierr,s1,s2,s3; 1473 1474 PetscFunctionBegin; 1475 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1476 if (rr) { 1477 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1478 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1479 /* Overlap communication with computation. */ 1480 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1481 } 1482 if (ll) { 1483 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1484 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1485 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1486 } 1487 /* scale the diagonal block */ 1488 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1489 1490 if (rr) { 1491 /* Do a scatter end and then right scale the off-diagonal block */ 1492 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1493 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1494 } 1495 1496 PetscFunctionReturn(0); 1497 } 1498 1499 1500 extern int MatPrintHelp_SeqAIJ(Mat); 1501 #undef __FUNC__ 1502 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1503 int MatPrintHelp_MPIAIJ(Mat A) 1504 { 1505 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1506 int ierr; 1507 1508 PetscFunctionBegin; 1509 if (!a->rank) { 1510 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #undef __FUNC__ 1516 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1517 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1518 { 1519 PetscFunctionBegin; 1520 *bs = 1; 1521 PetscFunctionReturn(0); 1522 } 1523 #undef __FUNC__ 1524 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1525 int MatSetUnfactored_MPIAIJ(Mat A) 1526 { 1527 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1528 int ierr; 1529 1530 PetscFunctionBegin; 1531 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNC__ 1536 #define __FUNC__ "MatEqual_MPIAIJ" 1537 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1538 { 1539 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1540 Mat a, b, c, d; 1541 PetscTruth flg; 1542 int ierr; 1543 1544 PetscFunctionBegin; 1545 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1546 a = matA->A; b = matA->B; 1547 c = matB->A; d = matB->B; 1548 1549 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1550 if (flg == PETSC_TRUE) { 1551 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1552 } 1553 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNC__ 1558 #define __FUNC__ "MatCopy_MPIAIJ" 1559 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1560 { 1561 int ierr; 1562 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1563 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1564 1565 PetscFunctionBegin; 1566 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1567 /* because of the column compression in the off-processor part of the matrix a->B, 1568 the number of columns in a->B and b->B may be different, hence we cannot call 1569 the MatCopy() directly on the two parts. If need be, we can provide a more 1570 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1571 then copying the submatrices */ 1572 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1573 } else { 1574 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1575 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1581 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1582 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1583 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1584 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1585 1586 /* -------------------------------------------------------------------*/ 1587 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1588 MatGetRow_MPIAIJ, 1589 MatRestoreRow_MPIAIJ, 1590 MatMult_MPIAIJ, 1591 MatMultAdd_MPIAIJ, 1592 MatMultTrans_MPIAIJ, 1593 MatMultTransAdd_MPIAIJ, 1594 0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 0, 1600 MatRelax_MPIAIJ, 1601 MatTranspose_MPIAIJ, 1602 MatGetInfo_MPIAIJ, 1603 MatEqual_MPIAIJ, 1604 MatGetDiagonal_MPIAIJ, 1605 MatDiagonalScale_MPIAIJ, 1606 MatNorm_MPIAIJ, 1607 MatAssemblyBegin_MPIAIJ, 1608 MatAssemblyEnd_MPIAIJ, 1609 0, 1610 MatSetOption_MPIAIJ, 1611 MatZeroEntries_MPIAIJ, 1612 MatZeroRows_MPIAIJ, 1613 0, 1614 0, 1615 0, 1616 0, 1617 MatGetSize_MPIAIJ, 1618 MatGetLocalSize_MPIAIJ, 1619 MatGetOwnershipRange_MPIAIJ, 1620 0, 1621 0, 1622 0, 1623 0, 1624 MatDuplicate_MPIAIJ, 1625 0, 1626 0, 1627 0, 1628 0, 1629 0, 1630 MatGetSubMatrices_MPIAIJ, 1631 MatIncreaseOverlap_MPIAIJ, 1632 MatGetValues_MPIAIJ, 1633 MatCopy_MPIAIJ, 1634 MatPrintHelp_MPIAIJ, 1635 MatScale_MPIAIJ, 1636 0, 1637 0, 1638 0, 1639 MatGetBlockSize_MPIAIJ, 1640 0, 1641 0, 1642 0, 1643 0, 1644 MatFDColoringCreate_MPIAIJ, 1645 0, 1646 MatSetUnfactored_MPIAIJ, 1647 0, 1648 0, 1649 MatGetSubMatrix_MPIAIJ, 1650 0, 1651 0, 1652 MatGetMaps_Petsc}; 1653 1654 /* ----------------------------------------------------------------------------------------*/ 1655 1656 EXTERN_C_BEGIN 1657 #undef __FUNC__ 1658 #define __FUNC__ "MatStoreValues_MPIAIJ" 1659 int MatStoreValues_MPIAIJ(Mat mat) 1660 { 1661 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1662 int ierr; 1663 1664 PetscFunctionBegin; 1665 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1666 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1667 PetscFunctionReturn(0); 1668 } 1669 EXTERN_C_END 1670 1671 EXTERN_C_BEGIN 1672 #undef __FUNC__ 1673 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1674 int MatRetrieveValues_MPIAIJ(Mat mat) 1675 { 1676 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1677 int ierr; 1678 1679 PetscFunctionBegin; 1680 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1681 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 EXTERN_C_END 1685 1686 #include "pc.h" 1687 EXTERN_C_BEGIN 1688 extern int PCSetUp_BJacobi_AIJ(PC); 1689 EXTERN_C_END 1690 1691 #undef __FUNC__ 1692 #define __FUNC__ "MatCreateMPIAIJ" 1693 /*@C 1694 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1695 (the default parallel PETSc format). For good matrix assembly performance 1696 the user should preallocate the matrix storage by setting the parameters 1697 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1698 performance can be increased by more than a factor of 50. 1699 1700 Collective on MPI_Comm 1701 1702 Input Parameters: 1703 + comm - MPI communicator 1704 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1705 This value should be the same as the local size used in creating the 1706 y vector for the matrix-vector product y = Ax. 1707 . n - This value should be the same as the local size used in creating the 1708 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1709 calculated if N is given) For square matrices n is almost always m. 1710 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1711 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1712 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1713 (same value is used for all local rows) 1714 . d_nnz - array containing the number of nonzeros in the various rows of the 1715 DIAGONAL portion of the local submatrix (possibly different for each row) 1716 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1717 The size of this array is equal to the number of local rows, i.e 'm'. 1718 You must leave room for the diagonal entry even if it is zero. 1719 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1720 submatrix (same value is used for all local rows). 1721 - o_nnz - array containing the number of nonzeros in the various rows of the 1722 OFF-DIAGONAL portion of the local submatrix (possibly different for 1723 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1724 structure. The size of this array is equal to the number 1725 of local rows, i.e 'm'. 1726 1727 Output Parameter: 1728 . A - the matrix 1729 1730 Notes: 1731 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1732 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1733 storage requirements for this matrix. 1734 1735 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1736 processor than it must be used on all processors that share the object for 1737 that argument. 1738 1739 The AIJ format (also called the Yale sparse matrix format or 1740 compressed row storage), is fully compatible with standard Fortran 77 1741 storage. That is, the stored row and column indices can begin at 1742 either one (as in Fortran) or zero. See the users manual for details. 1743 1744 The user MUST specify either the local or global matrix dimensions 1745 (possibly both). 1746 1747 The parallel matrix is partitioned such that the first m0 rows belong to 1748 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1749 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1750 1751 The DIAGONAL portion of the local submatrix of a processor can be defined 1752 as the submatrix which is obtained by extraction the part corresponding 1753 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1754 first row that belongs to the processor, and r2 is the last row belonging 1755 to the this processor. This is a square mxm matrix. The remaining portion 1756 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1757 1758 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1759 1760 By default, this format uses inodes (identical nodes) when possible. 1761 We search for consecutive rows with the same nonzero structure, thereby 1762 reusing matrix information to achieve increased efficiency. 1763 1764 Options Database Keys: 1765 + -mat_aij_no_inode - Do not use inodes 1766 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1767 - -mat_aij_oneindex - Internally use indexing starting at 1 1768 rather than 0. Note that when calling MatSetValues(), 1769 the user still MUST index entries starting at 0! 1770 . -mat_mpi - use the parallel matrix data structures even on one processor 1771 (defaults to using SeqBAIJ format on one processor) 1772 . -mat_mpi - use the parallel matrix data structures even on one processor 1773 (defaults to using SeqAIJ format on one processor) 1774 1775 1776 Example usage: 1777 1778 Consider the following 8x8 matrix with 34 non-zero values, that is 1779 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1780 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1781 as follows: 1782 1783 .vb 1784 1 2 0 | 0 3 0 | 0 4 1785 Proc0 0 5 6 | 7 0 0 | 8 0 1786 9 0 10 | 11 0 0 | 12 0 1787 ------------------------------------- 1788 13 0 14 | 15 16 17 | 0 0 1789 Proc1 0 18 0 | 19 20 21 | 0 0 1790 0 0 0 | 22 23 0 | 24 0 1791 ------------------------------------- 1792 Proc2 25 26 27 | 0 0 28 | 29 0 1793 30 0 0 | 31 32 33 | 0 34 1794 .ve 1795 1796 This can be represented as a collection of submatrices as: 1797 1798 .vb 1799 A B C 1800 D E F 1801 G H I 1802 .ve 1803 1804 Where the submatrices A,B,C are owned by proc0, D,E,F are 1805 owned by proc1, G,H,I are owned by proc2. 1806 1807 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1808 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1809 The 'M','N' parameters are 8,8, and have the same values on all procs. 1810 1811 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1812 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1813 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1814 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1815 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1816 matrix, ans [DF] as another SeqAIJ matrix. 1817 1818 When d_nz, o_nz parameters are specified, d_nz storage elements are 1819 allocated for every row of the local diagonal submatrix, and o_nz 1820 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1821 One way to choose d_nz and o_nz is to use the max nonzerors per local 1822 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1823 In this case, the values of d_nz,o_nz are: 1824 .vb 1825 proc0 : dnz = 2, o_nz = 2 1826 proc1 : dnz = 3, o_nz = 2 1827 proc2 : dnz = 1, o_nz = 4 1828 .ve 1829 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1830 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1831 for proc3. i.e we are using 12+15+10=37 storage locations to store 1832 34 values. 1833 1834 When d_nnz, o_nnz parameters are specified, the storage is specified 1835 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1836 In the above case the values for d_nnz,o_nnz are: 1837 .vb 1838 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1839 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1840 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1841 .ve 1842 Here the space allocated is sum of all the above values i.e 34, and 1843 hence pre-allocation is perfect. 1844 1845 .keywords: matrix, aij, compressed row, sparse, parallel 1846 1847 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1848 @*/ 1849 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1850 { 1851 Mat B; 1852 Mat_MPIAIJ *b; 1853 int ierr, i,sum[2],work[2],size,flag1 = 0, flag2 = 0; 1854 1855 PetscFunctionBegin; 1856 MPI_Comm_size(comm,&size); 1857 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1858 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1859 if (!flag1 && !flag2 && size == 1) { 1860 if (M == PETSC_DECIDE) M = m; 1861 if (N == PETSC_DECIDE) N = n; 1862 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 1866 *A = 0; 1867 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1868 PLogObjectCreate(B); 1869 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1870 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1871 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1872 B->ops->destroy = MatDestroy_MPIAIJ; 1873 B->ops->view = MatView_MPIAIJ; 1874 B->factor = 0; 1875 B->assembled = PETSC_FALSE; 1876 B->mapping = 0; 1877 1878 B->insertmode = NOT_SET_VALUES; 1879 b->size = size; 1880 MPI_Comm_rank(comm,&b->rank); 1881 1882 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1883 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1884 } 1885 1886 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1887 work[0] = m; work[1] = n; 1888 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1889 if (M == PETSC_DECIDE) M = sum[0]; 1890 if (N == PETSC_DECIDE) N = sum[1]; 1891 } 1892 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1893 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1894 b->m = m; B->m = m; 1895 b->n = n; B->n = n; 1896 b->N = N; B->N = N; 1897 b->M = M; B->M = M; 1898 1899 /* the information in the maps duplicates the information computed below, eventually 1900 we should remove the duplicate information that is not contained in the maps */ 1901 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1902 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1903 1904 /* build local table of row and column ownerships */ 1905 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1906 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1907 b->cowners = b->rowners + b->size + 2; 1908 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1909 b->rowners[0] = 0; 1910 for ( i=2; i<=b->size; i++ ) { 1911 b->rowners[i] += b->rowners[i-1]; 1912 } 1913 b->rstart = b->rowners[b->rank]; 1914 b->rend = b->rowners[b->rank+1]; 1915 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1916 b->cowners[0] = 0; 1917 for ( i=2; i<=b->size; i++ ) { 1918 b->cowners[i] += b->cowners[i-1]; 1919 } 1920 b->cstart = b->cowners[b->rank]; 1921 b->cend = b->cowners[b->rank+1]; 1922 1923 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1924 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1925 PLogObjectParent(B,b->A); 1926 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1927 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1928 PLogObjectParent(B,b->B); 1929 1930 /* build cache for off array entries formed */ 1931 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1932 b->donotstash = 0; 1933 b->colmap = 0; 1934 b->garray = 0; 1935 b->roworiented = 1; 1936 1937 /* stuff used for matrix vector multiply */ 1938 b->lvec = 0; 1939 b->Mvctx = 0; 1940 1941 /* stuff for MatGetRow() */ 1942 b->rowindices = 0; 1943 b->rowvalues = 0; 1944 b->getrowactive = PETSC_FALSE; 1945 1946 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1947 "MatStoreValues_MPIAIJ", 1948 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1949 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1950 "MatRetrieveValues_MPIAIJ", 1951 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1952 ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C", 1953 "PCSetUp_BJacobi_AIJ", 1954 (void*)PCSetUp_BJacobi_AIJ);CHKERRQ(ierr); 1955 *A = B; 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNC__ 1960 #define __FUNC__ "MatDuplicate_MPIAIJ" 1961 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1962 { 1963 Mat mat; 1964 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1965 int ierr, len=0, flg; 1966 1967 PetscFunctionBegin; 1968 *newmat = 0; 1969 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1970 PLogObjectCreate(mat); 1971 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1972 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1973 mat->ops->destroy = MatDestroy_MPIAIJ; 1974 mat->ops->view = MatView_MPIAIJ; 1975 mat->factor = matin->factor; 1976 mat->assembled = PETSC_TRUE; 1977 1978 a->m = mat->m = oldmat->m; 1979 a->n = mat->n = oldmat->n; 1980 a->M = mat->M = oldmat->M; 1981 a->N = mat->N = oldmat->N; 1982 1983 a->rstart = oldmat->rstart; 1984 a->rend = oldmat->rend; 1985 a->cstart = oldmat->cstart; 1986 a->cend = oldmat->cend; 1987 a->size = oldmat->size; 1988 a->rank = oldmat->rank; 1989 mat->insertmode = NOT_SET_VALUES; 1990 a->rowvalues = 0; 1991 a->getrowactive = PETSC_FALSE; 1992 1993 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1994 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1995 a->cowners = a->rowners + a->size + 2; 1996 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1997 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1998 if (oldmat->colmap) { 1999 #if defined (USE_CTABLE) 2000 ierr = TableCreateCopy( &a->colmap, oldmat->colmap ); CHKERRQ(ierr); 2001 #else 2002 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 2003 PLogObjectMemory(mat,(a->N)*sizeof(int)); 2004 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 2005 #endif 2006 } else a->colmap = 0; 2007 if (oldmat->garray) { 2008 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 2009 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 2010 PLogObjectMemory(mat,len*sizeof(int)); 2011 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2012 } else a->garray = 0; 2013 2014 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2015 PLogObjectParent(mat,a->lvec); 2016 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2017 PLogObjectParent(mat,a->Mvctx); 2018 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2019 PLogObjectParent(mat,a->A); 2020 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2021 PLogObjectParent(mat,a->B); 2022 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2023 if (flg) { 2024 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2025 } 2026 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2027 *newmat = mat; 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #include "sys.h" 2032 2033 #undef __FUNC__ 2034 #define __FUNC__ "MatLoad_MPIAIJ" 2035 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2036 { 2037 Mat A; 2038 Scalar *vals,*svals; 2039 MPI_Comm comm = ((PetscObject)viewer)->comm; 2040 MPI_Status status; 2041 int i, nz, ierr, j,rstart, rend, fd; 2042 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2043 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2044 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2045 2046 PetscFunctionBegin; 2047 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2048 if (!rank) { 2049 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2050 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2051 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2052 if (header[3] < 0) { 2053 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2054 } 2055 } 2056 2057 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2058 M = header[1]; N = header[2]; 2059 /* determine ownership of all rows */ 2060 m = M/size + ((M % size) > rank); 2061 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2062 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2063 rowners[0] = 0; 2064 for ( i=2; i<=size; i++ ) { 2065 rowners[i] += rowners[i-1]; 2066 } 2067 rstart = rowners[rank]; 2068 rend = rowners[rank+1]; 2069 2070 /* distribute row lengths to all processors */ 2071 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2072 offlens = ourlens + (rend-rstart); 2073 if (!rank) { 2074 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2075 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2076 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2077 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2078 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2079 PetscFree(sndcounts); 2080 } else { 2081 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2082 } 2083 2084 if (!rank) { 2085 /* calculate the number of nonzeros on each processor */ 2086 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2087 PetscMemzero(procsnz,size*sizeof(int)); 2088 for ( i=0; i<size; i++ ) { 2089 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2090 procsnz[i] += rowlengths[j]; 2091 } 2092 } 2093 PetscFree(rowlengths); 2094 2095 /* determine max buffer needed and allocate it */ 2096 maxnz = 0; 2097 for ( i=0; i<size; i++ ) { 2098 maxnz = PetscMax(maxnz,procsnz[i]); 2099 } 2100 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2101 2102 /* read in my part of the matrix column indices */ 2103 nz = procsnz[0]; 2104 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2105 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2106 2107 /* read in every one elses and ship off */ 2108 for ( i=1; i<size; i++ ) { 2109 nz = procsnz[i]; 2110 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2111 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2112 } 2113 PetscFree(cols); 2114 } else { 2115 /* determine buffer space needed for message */ 2116 nz = 0; 2117 for ( i=0; i<m; i++ ) { 2118 nz += ourlens[i]; 2119 } 2120 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2121 2122 /* receive message of column indices*/ 2123 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2124 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2125 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2126 } 2127 2128 /* determine column ownership if matrix is not square */ 2129 if (N != M) { 2130 n = N/size + ((N % size) > rank); 2131 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2132 cstart = cend - n; 2133 } else { 2134 cstart = rstart; 2135 cend = rend; 2136 n = cend - cstart; 2137 } 2138 2139 /* loop over local rows, determining number of off diagonal entries */ 2140 PetscMemzero(offlens,m*sizeof(int)); 2141 jj = 0; 2142 for ( i=0; i<m; i++ ) { 2143 for ( j=0; j<ourlens[i]; j++ ) { 2144 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2145 jj++; 2146 } 2147 } 2148 2149 /* create our matrix */ 2150 for ( i=0; i<m; i++ ) { 2151 ourlens[i] -= offlens[i]; 2152 } 2153 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2154 A = *newmat; 2155 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2156 for ( i=0; i<m; i++ ) { 2157 ourlens[i] += offlens[i]; 2158 } 2159 2160 if (!rank) { 2161 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2162 2163 /* read in my part of the matrix numerical values */ 2164 nz = procsnz[0]; 2165 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2166 2167 /* insert into matrix */ 2168 jj = rstart; 2169 smycols = mycols; 2170 svals = vals; 2171 for ( i=0; i<m; i++ ) { 2172 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2173 smycols += ourlens[i]; 2174 svals += ourlens[i]; 2175 jj++; 2176 } 2177 2178 /* read in other processors and ship out */ 2179 for ( i=1; i<size; i++ ) { 2180 nz = procsnz[i]; 2181 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2182 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2183 } 2184 PetscFree(procsnz); 2185 } else { 2186 /* receive numeric values */ 2187 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2188 2189 /* receive message of values*/ 2190 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2191 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2192 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2193 2194 /* insert into matrix */ 2195 jj = rstart; 2196 smycols = mycols; 2197 svals = vals; 2198 for ( i=0; i<m; i++ ) { 2199 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2200 smycols += ourlens[i]; 2201 svals += ourlens[i]; 2202 jj++; 2203 } 2204 } 2205 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2206 2207 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2208 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNC__ 2213 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2214 /* 2215 Not great since it makes two copies of the submatrix, first an SeqAIJ 2216 in local and then by concatenating the local matrices the end result. 2217 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2218 */ 2219 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2220 { 2221 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2222 Mat *local,M, Mreuse; 2223 Scalar *vwork,*aa; 2224 MPI_Comm comm = mat->comm; 2225 Mat_SeqAIJ *aij; 2226 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2227 2228 PetscFunctionBegin; 2229 MPI_Comm_rank(comm,&rank); 2230 MPI_Comm_size(comm,&size); 2231 2232 if (call == MAT_REUSE_MATRIX) { 2233 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2234 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2235 local = &Mreuse; 2236 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2237 } else { 2238 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2239 Mreuse = *local; 2240 PetscFree(local); 2241 } 2242 2243 /* 2244 m - number of local rows 2245 n - number of columns (same on all processors) 2246 rstart - first row in new global matrix generated 2247 */ 2248 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2249 if (call == MAT_INITIAL_MATRIX) { 2250 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2251 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2252 ii = aij->i; 2253 jj = aij->j; 2254 2255 /* 2256 Determine the number of non-zeros in the diagonal and off-diagonal 2257 portions of the matrix in order to do correct preallocation 2258 */ 2259 2260 /* first get start and end of "diagonal" columns */ 2261 if (csize == PETSC_DECIDE) { 2262 nlocal = n/size + ((n % size) > rank); 2263 } else { 2264 nlocal = csize; 2265 } 2266 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2267 rstart = rend - nlocal; 2268 if (rank == size - 1 && rend != n) { 2269 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2270 } 2271 2272 /* next, compute all the lengths */ 2273 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2274 olens = dlens + m; 2275 for ( i=0; i<m; i++ ) { 2276 jend = ii[i+1] - ii[i]; 2277 olen = 0; 2278 dlen = 0; 2279 for ( j=0; j<jend; j++ ) { 2280 if ( *jj < rstart || *jj >= rend) olen++; 2281 else dlen++; 2282 jj++; 2283 } 2284 olens[i] = olen; 2285 dlens[i] = dlen; 2286 } 2287 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2288 PetscFree(dlens); 2289 } else { 2290 int ml,nl; 2291 2292 M = *newmat; 2293 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2294 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2295 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2296 /* 2297 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2298 rather than the slower MatSetValues(). 2299 */ 2300 M->was_assembled = PETSC_TRUE; 2301 M->assembled = PETSC_FALSE; 2302 } 2303 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2304 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2305 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2306 ii = aij->i; 2307 jj = aij->j; 2308 aa = aij->a; 2309 for (i=0; i<m; i++) { 2310 row = rstart + i; 2311 nz = ii[i+1] - ii[i]; 2312 cwork = jj; jj += nz; 2313 vwork = aa; aa += nz; 2314 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2315 } 2316 2317 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2318 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2319 *newmat = M; 2320 2321 /* save submatrix used in processor for next request */ 2322 if (call == MAT_INITIAL_MATRIX) { 2323 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2324 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2325 } 2326 2327 PetscFunctionReturn(0); 2328 } 2329