1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.277 1999/02/11 20:16:32 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 #if defined (USE_CTABLE) 22 int ierr; 23 #endif 24 25 PetscFunctionBegin; 26 #if defined (USE_CTABLE) 27 ierr = TableCreate( &aij->colmap, aij->n/5 ); CHKERRQ(ierr); 28 for ( i=0; i<n; i++ ){ 29 ierr = TableAdd( aij->colmap, aij->garray[i] + 1, i+1 );CHKERRQ(ierr); 30 } 31 #else 32 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 33 PLogObjectMemory(mat,aij->N*sizeof(int)); 34 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 35 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 36 #endif 37 PetscFunctionReturn(0); 38 } 39 40 extern int DisAssemble_MPIAIJ(Mat); 41 42 #define CHUNKSIZE 15 43 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 44 { \ 45 \ 46 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 47 rmax = aimax[row]; nrow = ailen[row]; \ 48 col1 = col - shift; \ 49 \ 50 low = 0; high = nrow; \ 51 while (high-low > 5) { \ 52 t = (low+high)/2; \ 53 if (rp[t] > col) high = t; \ 54 else low = t; \ 55 } \ 56 for ( _i=0; _i<nrow; _i++ ) { \ 57 if (rp[_i] > col1) break; \ 58 if (rp[_i] == col1) { \ 59 if (addv == ADD_VALUES) ap[_i] += value; \ 60 else ap[_i] = value; \ 61 goto a_noinsert; \ 62 } \ 63 } \ 64 if (nonew == 1) goto a_noinsert; \ 65 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 66 if (nrow >= rmax) { \ 67 /* there is no extra room in row, therefore enlarge */ \ 68 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 69 Scalar *new_a; \ 70 \ 71 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 72 \ 73 /* malloc new storage space */ \ 74 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 75 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 76 new_j = (int *) (new_a + new_nz); \ 77 new_i = new_j + new_nz; \ 78 \ 79 /* copy over old data into new slots */ \ 80 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 81 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 82 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 83 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 84 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 85 len*sizeof(int)); \ 86 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 87 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 88 len*sizeof(Scalar)); \ 89 /* free up old matrix storage */ \ 90 \ 91 PetscFree(a->a); \ 92 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 93 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 94 a->singlemalloc = 1; \ 95 \ 96 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 97 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 98 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 99 a->maxnz += CHUNKSIZE; \ 100 a->reallocs++; \ 101 } \ 102 N = nrow++ - 1; a->nz++; \ 103 /* shift up all the later entries in this row */ \ 104 for ( ii=N; ii>=_i; ii-- ) { \ 105 rp[ii+1] = rp[ii]; \ 106 ap[ii+1] = ap[ii]; \ 107 } \ 108 rp[_i] = col1; \ 109 ap[_i] = value; \ 110 a_noinsert: ; \ 111 ailen[row] = nrow; \ 112 } 113 114 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 118 rmax = bimax[row]; nrow = bilen[row]; \ 119 col1 = col - shift; \ 120 \ 121 low = 0; high = nrow; \ 122 while (high-low > 5) { \ 123 t = (low+high)/2; \ 124 if (rp[t] > col) high = t; \ 125 else low = t; \ 126 } \ 127 for ( _i=0; _i<nrow; _i++ ) { \ 128 if (rp[_i] > col1) break; \ 129 if (rp[_i] == col1) { \ 130 if (addv == ADD_VALUES) ap[_i] += value; \ 131 else ap[_i] = value; \ 132 goto b_noinsert; \ 133 } \ 134 } \ 135 if (nonew == 1) goto b_noinsert; \ 136 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 137 if (nrow >= rmax) { \ 138 /* there is no extra room in row, therefore enlarge */ \ 139 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 140 Scalar *new_a; \ 141 \ 142 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 143 \ 144 /* malloc new storage space */ \ 145 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 146 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 147 new_j = (int *) (new_a + new_nz); \ 148 new_i = new_j + new_nz; \ 149 \ 150 /* copy over old data into new slots */ \ 151 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 152 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 153 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 154 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 155 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 156 len*sizeof(int)); \ 157 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 158 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 159 len*sizeof(Scalar)); \ 160 /* free up old matrix storage */ \ 161 \ 162 PetscFree(b->a); \ 163 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 164 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 165 b->singlemalloc = 1; \ 166 \ 167 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 168 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 169 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 170 b->maxnz += CHUNKSIZE; \ 171 b->reallocs++; \ 172 } \ 173 N = nrow++ - 1; b->nz++; \ 174 /* shift up all the later entries in this row */ \ 175 for ( ii=N; ii>=_i; ii-- ) { \ 176 rp[ii+1] = rp[ii]; \ 177 ap[ii+1] = ap[ii]; \ 178 } \ 179 rp[_i] = col1; \ 180 ap[_i] = value; \ 181 b_noinsert: ; \ 182 bilen[row] = nrow; \ 183 } 184 185 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 186 #undef __FUNC__ 187 #define __FUNC__ "MatSetValues_MPIAIJ" 188 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 191 Scalar value; 192 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 193 int cstart = aij->cstart, cend = aij->cend,row,col; 194 int roworiented = aij->roworiented; 195 196 /* Some Variables required in the macro */ 197 Mat A = aij->A; 198 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 199 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 200 Scalar *aa = a->a; 201 202 Mat B = aij->B; 203 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 204 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 205 Scalar *ba = b->a; 206 207 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 208 int nonew = a->nonew,shift = a->indexshift; 209 Scalar *ap; 210 211 PetscFunctionBegin; 212 for ( i=0; i<m; i++ ) { 213 if (im[i] < 0) continue; 214 #if defined(USE_PETSC_BOPT_g) 215 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 216 #endif 217 if (im[i] >= rstart && im[i] < rend) { 218 row = im[i] - rstart; 219 for ( j=0; j<n; j++ ) { 220 if (in[j] >= cstart && in[j] < cend){ 221 col = in[j] - cstart; 222 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } 226 else if (in[j] < 0) continue; 227 #if defined(USE_PETSC_BOPT_g) 228 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 229 #endif 230 else { 231 if (mat->was_assembled) { 232 if (!aij->colmap) { 233 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 234 } 235 #if defined (USE_CTABLE) 236 col = TableFind( aij->colmap, in[j] + 1 ) - 1; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ *) B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !aij->donotstash) { 257 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!aij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNC__ 272 #define __FUNC__ "MatGetValues_MPIAIJ" 273 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 276 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 277 int cstart = aij->cstart, cend = aij->cend,row,col; 278 279 PetscFunctionBegin; 280 for ( i=0; i<m; i++ ) { 281 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 282 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 283 if (idxm[i] >= rstart && idxm[i] < rend) { 284 row = idxm[i] - rstart; 285 for ( j=0; j<n; j++ ) { 286 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 287 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 288 if (idxn[j] >= cstart && idxn[j] < cend){ 289 col = idxn[j] - cstart; 290 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 291 } else { 292 if (!aij->colmap) { 293 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 294 } 295 #if defined (USE_CTABLE) 296 col = TableFind( aij->colmap, idxn[j] + 1 ) - 1; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 318 MPI_Comm comm = mat->comm; 319 int size = aij->size, *owners = aij->rowners; 320 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 321 MPI_Request *send_waits,*recv_waits; 322 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 323 InsertMode addv; 324 Scalar *rvalues,*svalues; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 aij->svalues = 0; aij->rvalues = 0; 329 aij->nsends = 0; aij->nrecvs = 0; 330 aij->send_waits = 0; aij->recv_waits = 0; 331 aij->rmax = 0; 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 /* first count number of contributors to each processor */ 343 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 344 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 345 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 346 for ( i=0; i<aij->stash.n; i++ ) { 347 idx = aij->stash.idx[i]; 348 for ( j=0; j<size; j++ ) { 349 if (idx >= owners[j] && idx < owners[j+1]) { 350 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 351 } 352 } 353 } 354 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 355 356 /* inform other processors of number of messages and max length*/ 357 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 358 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 359 nreceives = work[rank]; 360 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 361 nmax = work[rank]; 362 PetscFree(work); 363 364 /* post receives: 365 1) each message will consist of ordered pairs 366 (global index,value) we store the global index as a double 367 to simplify the message passing. 368 2) since we don't know how long each individual message is we 369 allocate the largest needed buffer for each receive. Potentially 370 this is a lot of wasted space. 371 372 373 This could be done better. 374 */ 375 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 376 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 377 for ( i=0; i<nreceives; i++ ) { 378 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 379 comm,recv_waits+i);CHKERRQ(ierr); 380 } 381 382 /* do sends: 383 1) starts[i] gives the starting index in svalues for stuff going to 384 the ith processor 385 */ 386 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 387 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 388 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 389 starts[0] = 0; 390 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 391 for ( i=0; i<aij->stash.n; i++ ) { 392 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 393 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 394 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 395 } 396 PetscFree(owner); 397 starts[0] = 0; 398 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 399 count = 0; 400 for ( i=0; i<size; i++ ) { 401 if (procs[i]) { 402 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 403 } 404 } 405 PetscFree(starts); 406 PetscFree(nprocs); 407 408 /* Free cache space */ 409 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 410 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 411 412 aij->svalues = svalues; aij->rvalues = rvalues; 413 aij->nsends = nsends; aij->nrecvs = nreceives; 414 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 415 aij->rmax = nmax; 416 417 PetscFunctionReturn(0); 418 } 419 extern int MatSetUpMultiply_MPIAIJ(Mat); 420 421 #undef __FUNC__ 422 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 423 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 424 { 425 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 426 MPI_Status *send_status,recv_status; 427 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 428 int row,col,other_disassembled; 429 Scalar *values,val; 430 InsertMode addv = mat->insertmode; 431 432 PetscFunctionBegin; 433 434 /* wait on receives */ 435 while (count) { 436 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 437 /* unpack receives into our local space */ 438 values = aij->rvalues + 3*imdex*aij->rmax; 439 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 440 n = n/3; 441 for ( i=0; i<n; i++ ) { 442 row = (int) PetscReal(values[3*i]) - aij->rstart; 443 col = (int) PetscReal(values[3*i+1]); 444 val = values[3*i+2]; 445 if (col >= aij->cstart && col < aij->cend) { 446 col -= aij->cstart; 447 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 448 } else { 449 if (mat->was_assembled || mat->assembled) { 450 if (!aij->colmap) { 451 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 452 } 453 #if defined (USE_CTABLE) 454 col = TableFind( aij->colmap, col + 1 ) - 1; 455 #else 456 col = aij->colmap[col] - 1; 457 #endif 458 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 col = (int) PetscReal(values[3*i+1]); 461 } 462 } 463 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 464 } 465 } 466 count--; 467 } 468 if (aij->recv_waits) PetscFree(aij->recv_waits); 469 if (aij->rvalues) PetscFree(aij->rvalues); 470 471 /* wait on sends */ 472 if (aij->nsends) { 473 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 474 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 475 PetscFree(send_status); 476 } 477 if (aij->send_waits) PetscFree(aij->send_waits); 478 if (aij->svalues) PetscFree(aij->svalues); 479 480 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 481 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 482 483 /* determine if any processor has disassembled, if so we must 484 also disassemble ourselfs, in order that we may reassemble. */ 485 /* 486 if nonzero structure of submatrix B cannot change then we know that 487 no processor disassembled thus we can skip this stuff 488 */ 489 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 490 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 491 if (mat->was_assembled && !other_disassembled) { 492 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 493 } 494 } 495 496 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 497 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 498 } 499 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 500 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 501 502 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNC__ 507 #define __FUNC__ "MatZeroEntries_MPIAIJ" 508 int MatZeroEntries_MPIAIJ(Mat A) 509 { 510 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 511 int ierr; 512 513 PetscFunctionBegin; 514 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 515 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNC__ 520 #define __FUNC__ "MatZeroRows_MPIAIJ" 521 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 522 { 523 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 524 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 525 int *procs,*nprocs,j,found,idx,nsends,*work,row; 526 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 527 int *rvalues,tag = A->tag,count,base,slen,n,*source; 528 int *lens,imdex,*lrows,*values,rstart=l->rstart; 529 MPI_Comm comm = A->comm; 530 MPI_Request *send_waits,*recv_waits; 531 MPI_Status recv_status,*send_status; 532 IS istmp; 533 534 PetscFunctionBegin; 535 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 536 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 537 538 /* first count number of contributors to each processor */ 539 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 540 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 541 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 542 for ( i=0; i<N; i++ ) { 543 idx = rows[i]; 544 found = 0; 545 for ( j=0; j<size; j++ ) { 546 if (idx >= owners[j] && idx < owners[j+1]) { 547 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 548 } 549 } 550 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 551 } 552 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 553 554 /* inform other processors of number of messages and max length*/ 555 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 556 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 557 nrecvs = work[rank]; 558 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 559 nmax = work[rank]; 560 PetscFree(work); 561 562 /* post receives: */ 563 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 564 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 565 for ( i=0; i<nrecvs; i++ ) { 566 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 567 } 568 569 /* do sends: 570 1) starts[i] gives the starting index in svalues for stuff going to 571 the ith processor 572 */ 573 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 574 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 575 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 576 starts[0] = 0; 577 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 578 for ( i=0; i<N; i++ ) { 579 svalues[starts[owner[i]]++] = rows[i]; 580 } 581 ISRestoreIndices(is,&rows); 582 583 starts[0] = 0; 584 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 585 count = 0; 586 for ( i=0; i<size; i++ ) { 587 if (procs[i]) { 588 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 589 } 590 } 591 PetscFree(starts); 592 593 base = owners[rank]; 594 595 /* wait on receives */ 596 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 597 source = lens + nrecvs; 598 count = nrecvs; slen = 0; 599 while (count) { 600 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 601 /* unpack receives into our local space */ 602 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 603 source[imdex] = recv_status.MPI_SOURCE; 604 lens[imdex] = n; 605 slen += n; 606 count--; 607 } 608 PetscFree(recv_waits); 609 610 /* move the data into the send scatter */ 611 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 612 count = 0; 613 for ( i=0; i<nrecvs; i++ ) { 614 values = rvalues + i*nmax; 615 for ( j=0; j<lens[i]; j++ ) { 616 lrows[count++] = values[j] - base; 617 } 618 } 619 PetscFree(rvalues); PetscFree(lens); 620 PetscFree(owner); PetscFree(nprocs); 621 622 /* actually zap the local rows */ 623 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 624 PLogObjectParent(A,istmp); 625 626 /* 627 Zero the required rows. If the "diagonal block" of the matrix 628 is square and the user wishes to set the diagonal we use seperate 629 code so that MatSetValues() is not called for each diagonal allocating 630 new memory, thus calling lots of mallocs and slowing things down. 631 632 Contributed by: Mathew Knepley 633 */ 634 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 635 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 636 if (diag && (l->A->M == l->A->N)) { 637 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 638 } else if (diag) { 639 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 640 for ( i = 0; i < slen; i++ ) { 641 row = lrows[i] + rstart; 642 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 643 } 644 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 645 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 646 } else { 647 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 648 } 649 ierr = ISDestroy(istmp); CHKERRQ(ierr); 650 PetscFree(lrows); 651 652 /* wait on sends */ 653 if (nsends) { 654 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 655 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 656 PetscFree(send_status); 657 } 658 PetscFree(send_waits); PetscFree(svalues); 659 660 PetscFunctionReturn(0); 661 } 662 663 #undef __FUNC__ 664 #define __FUNC__ "MatMult_MPIAIJ" 665 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 666 { 667 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 668 int ierr,nt; 669 670 PetscFunctionBegin; 671 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 672 if (nt != a->n) { 673 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 674 } 675 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 676 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 677 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 678 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNC__ 683 #define __FUNC__ "MatMultAdd_MPIAIJ" 684 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 685 { 686 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 687 int ierr; 688 689 PetscFunctionBegin; 690 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 691 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 692 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 693 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNC__ 698 #define __FUNC__ "MatMultTrans_MPIAIJ" 699 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 700 { 701 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 702 int ierr; 703 704 PetscFunctionBegin; 705 /* do nondiagonal part */ 706 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 707 /* send it on its way */ 708 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 709 /* do local part */ 710 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 711 /* receive remote parts: note this assumes the values are not actually */ 712 /* inserted in yy until the next line, which is true for my implementation*/ 713 /* but is not perhaps always true. */ 714 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNC__ 719 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 720 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 721 { 722 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 723 int ierr; 724 725 PetscFunctionBegin; 726 /* do nondiagonal part */ 727 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 728 /* send it on its way */ 729 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 730 /* do local part */ 731 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 732 /* receive remote parts: note this assumes the values are not actually */ 733 /* inserted in yy until the next line, which is true for my implementation*/ 734 /* but is not perhaps always true. */ 735 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 /* 740 This only works correctly for square matrices where the subblock A->A is the 741 diagonal block 742 */ 743 #undef __FUNC__ 744 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 745 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 746 { 747 int ierr; 748 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 749 750 PetscFunctionBegin; 751 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 752 if (a->rstart != a->cstart || a->rend != a->cend) { 753 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 754 } 755 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNC__ 760 #define __FUNC__ "MatScale_MPIAIJ" 761 int MatScale_MPIAIJ(Scalar *aa,Mat A) 762 { 763 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 764 int ierr; 765 766 PetscFunctionBegin; 767 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 768 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNC__ 773 #define __FUNC__ "MatDestroy_MPIAIJ" 774 int MatDestroy_MPIAIJ(Mat mat) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 777 int ierr; 778 779 PetscFunctionBegin; 780 if (--mat->refct > 0) PetscFunctionReturn(0); 781 782 if (mat->mapping) { 783 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 784 } 785 if (mat->bmapping) { 786 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 787 } 788 if (mat->rmap) { 789 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 790 } 791 if (mat->cmap) { 792 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 793 } 794 #if defined(USE_PETSC_LOG) 795 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 796 #endif 797 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 798 PetscFree(aij->rowners); 799 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 800 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 801 #if defined (USE_CTABLE) 802 if (aij->colmap) TableDelete(aij->colmap); 803 #else 804 if (aij->colmap) PetscFree(aij->colmap); 805 #endif 806 if (aij->garray) PetscFree(aij->garray); 807 if (aij->lvec) VecDestroy(aij->lvec); 808 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 809 if (aij->rowvalues) PetscFree(aij->rowvalues); 810 PetscFree(aij); 811 PLogObjectDestroy(mat); 812 PetscHeaderDestroy(mat); 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNC__ 817 #define __FUNC__ "MatView_MPIAIJ_Binary" 818 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 819 { 820 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 821 int ierr; 822 823 PetscFunctionBegin; 824 if (aij->size == 1) { 825 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 826 } 827 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNC__ 832 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 833 extern int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 834 { 835 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 836 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 837 int ierr, format,shift = C->indexshift,rank = aij->rank ; 838 int size = aij->size; 839 FILE *fd; 840 ViewerType vtype; 841 842 PetscFunctionBegin; 843 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 844 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 845 ierr = ViewerGetFormat(viewer,&format); 846 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 847 MatInfo info; 848 int flg; 849 MPI_Comm_rank(mat->comm,&rank); 850 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 851 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 852 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 853 PetscSequentialPhaseBegin(mat->comm,1); 854 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 855 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 856 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 857 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 858 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 859 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 860 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 861 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 862 fflush(fd); 863 PetscSequentialPhaseEnd(mat->comm,1); 864 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 867 PetscFunctionReturn(0); 868 } 869 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 870 Draw draw; 871 PetscTruth isnull; 872 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 873 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 874 } 875 876 if (size == 1) { 877 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 878 } else { 879 /* assemble the entire matrix onto first processor. */ 880 Mat A; 881 Mat_SeqAIJ *Aloc; 882 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 883 Scalar *a; 884 885 if (!rank) { 886 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 887 } else { 888 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 889 } 890 PLogObjectParent(mat,A); 891 892 /* copy over the A part */ 893 Aloc = (Mat_SeqAIJ*) aij->A->data; 894 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 895 row = aij->rstart; 896 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 897 for ( i=0; i<m; i++ ) { 898 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 899 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 900 } 901 aj = Aloc->j; 902 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 903 904 /* copy over the B part */ 905 Aloc = (Mat_SeqAIJ*) aij->B->data; 906 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 907 row = aij->rstart; 908 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 909 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 910 for ( i=0; i<m; i++ ) { 911 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 912 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 913 } 914 PetscFree(ct); 915 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 917 /* 918 Everyone has to call to draw the matrix since the graphics waits are 919 synchronized across all processors that share the Draw object 920 */ 921 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 922 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 923 } 924 ierr = MatDestroy(A); CHKERRQ(ierr); 925 } 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNC__ 930 #define __FUNC__ "MatView_MPIAIJ" 931 int MatView_MPIAIJ(Mat mat,Viewer viewer) 932 { 933 int ierr; 934 ViewerType vtype; 935 936 PetscFunctionBegin; 937 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 938 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 939 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 940 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 941 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 942 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 943 } else { 944 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 945 } 946 PetscFunctionReturn(0); 947 } 948 949 /* 950 This has to provide several versions. 951 952 2) a) use only local smoothing updating outer values only once. 953 b) local smoothing updating outer values each inner iteration 954 3) color updating out values betwen colors. 955 */ 956 #undef __FUNC__ 957 #define __FUNC__ "MatRelax_MPIAIJ" 958 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 959 double fshift,int its,Vec xx) 960 { 961 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 962 Mat AA = mat->A, BB = mat->B; 963 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 964 Scalar *b,*x,*xs,*ls,d,*v,sum; 965 int ierr,*idx, *diag; 966 int n = mat->n, m = mat->m, i,shift = A->indexshift; 967 968 PetscFunctionBegin; 969 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 970 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 971 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 972 xs = x + shift; /* shift by one for index start of 1 */ 973 ls = ls + shift; 974 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 975 diag = A->diag; 976 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 977 if (flag & SOR_ZERO_INITIAL_GUESS) { 978 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 979 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 980 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 981 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 985 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 986 while (its--) { 987 /* go down through the rows */ 988 for ( i=0; i<m; i++ ) { 989 n = A->i[i+1] - A->i[i]; 990 PLogFlops(4*n+3); 991 idx = A->j + A->i[i] + shift; 992 v = A->a + A->i[i] + shift; 993 sum = b[i]; 994 SPARSEDENSEMDOT(sum,xs,v,idx,n); 995 d = fshift + A->a[diag[i]+shift]; 996 n = B->i[i+1] - B->i[i]; 997 idx = B->j + B->i[i] + shift; 998 v = B->a + B->i[i] + shift; 999 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1000 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1001 } 1002 /* come up through the rows */ 1003 for ( i=m-1; i>-1; i-- ) { 1004 n = A->i[i+1] - A->i[i]; 1005 PLogFlops(4*n+3) 1006 idx = A->j + A->i[i] + shift; 1007 v = A->a + A->i[i] + shift; 1008 sum = b[i]; 1009 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1010 d = fshift + A->a[diag[i]+shift]; 1011 n = B->i[i+1] - B->i[i]; 1012 idx = B->j + B->i[i] + shift; 1013 v = B->a + B->i[i] + shift; 1014 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1015 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1016 } 1017 } 1018 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1021 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1022 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1023 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1028 while (its--) { 1029 for ( i=0; i<m; i++ ) { 1030 n = A->i[i+1] - A->i[i]; 1031 PLogFlops(4*n+3); 1032 idx = A->j + A->i[i] + shift; 1033 v = A->a + A->i[i] + shift; 1034 sum = b[i]; 1035 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1036 d = fshift + A->a[diag[i]+shift]; 1037 n = B->i[i+1] - B->i[i]; 1038 idx = B->j + B->i[i] + shift; 1039 v = B->a + B->i[i] + shift; 1040 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1041 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1042 } 1043 } 1044 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1045 if (flag & SOR_ZERO_INITIAL_GUESS) { 1046 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1048 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1049 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 while (its--) { 1055 for ( i=m-1; i>-1; i-- ) { 1056 n = A->i[i+1] - A->i[i]; 1057 PLogFlops(4*n+3); 1058 idx = A->j + A->i[i] + shift; 1059 v = A->a + A->i[i] + shift; 1060 sum = b[i]; 1061 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1062 d = fshift + A->a[diag[i]+shift]; 1063 n = B->i[i+1] - B->i[i]; 1064 idx = B->j + B->i[i] + shift; 1065 v = B->a + B->i[i] + shift; 1066 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1067 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1068 } 1069 } 1070 } else { 1071 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1072 } 1073 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1074 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1075 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNC__ 1080 #define __FUNC__ "MatGetInfo_MPIAIJ" 1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1084 Mat A = mat->A, B = mat->B; 1085 int ierr; 1086 double isend[5], irecv[5]; 1087 1088 PetscFunctionBegin; 1089 info->block_size = 1.0; 1090 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1091 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1092 isend[3] = info->memory; isend[4] = info->mallocs; 1093 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1094 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1095 isend[3] += info->memory; isend[4] += info->mallocs; 1096 if (flag == MAT_LOCAL) { 1097 info->nz_used = isend[0]; 1098 info->nz_allocated = isend[1]; 1099 info->nz_unneeded = isend[2]; 1100 info->memory = isend[3]; 1101 info->mallocs = isend[4]; 1102 } else if (flag == MAT_GLOBAL_MAX) { 1103 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1104 info->nz_used = irecv[0]; 1105 info->nz_allocated = irecv[1]; 1106 info->nz_unneeded = irecv[2]; 1107 info->memory = irecv[3]; 1108 info->mallocs = irecv[4]; 1109 } else if (flag == MAT_GLOBAL_SUM) { 1110 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1111 info->nz_used = irecv[0]; 1112 info->nz_allocated = irecv[1]; 1113 info->nz_unneeded = irecv[2]; 1114 info->memory = irecv[3]; 1115 info->mallocs = irecv[4]; 1116 } 1117 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1118 info->fill_ratio_needed = 0; 1119 info->factor_mallocs = 0; 1120 info->rows_global = (double)mat->M; 1121 info->columns_global = (double)mat->N; 1122 info->rows_local = (double)mat->m; 1123 info->columns_local = (double)mat->N; 1124 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNC__ 1129 #define __FUNC__ "MatSetOption_MPIAIJ" 1130 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1133 1134 PetscFunctionBegin; 1135 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1136 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1137 op == MAT_COLUMNS_UNSORTED || 1138 op == MAT_COLUMNS_SORTED || 1139 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1140 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1141 MatSetOption(a->A,op); 1142 MatSetOption(a->B,op); 1143 } else if (op == MAT_ROW_ORIENTED) { 1144 a->roworiented = 1; 1145 MatSetOption(a->A,op); 1146 MatSetOption(a->B,op); 1147 } else if (op == MAT_ROWS_SORTED || 1148 op == MAT_ROWS_UNSORTED || 1149 op == MAT_SYMMETRIC || 1150 op == MAT_STRUCTURALLY_SYMMETRIC || 1151 op == MAT_YES_NEW_DIAGONALS) 1152 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1153 else if (op == MAT_COLUMN_ORIENTED) { 1154 a->roworiented = 0; 1155 MatSetOption(a->A,op); 1156 MatSetOption(a->B,op); 1157 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1158 a->donotstash = 1; 1159 } else if (op == MAT_NO_NEW_DIAGONALS){ 1160 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1161 } else { 1162 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNC__ 1168 #define __FUNC__ "MatGetSize_MPIAIJ" 1169 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1170 { 1171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1172 1173 PetscFunctionBegin; 1174 if (m) *m = mat->M; 1175 if (n) *n = mat->N; 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1181 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1182 { 1183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1184 1185 PetscFunctionBegin; 1186 if (m) *m = mat->m; 1187 if (n) *n = mat->n; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNC__ 1192 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1193 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1194 { 1195 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1196 1197 PetscFunctionBegin; 1198 *m = mat->rstart; *n = mat->rend; 1199 PetscFunctionReturn(0); 1200 } 1201 1202 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1203 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1204 1205 #undef __FUNC__ 1206 #define __FUNC__ "MatGetRow_MPIAIJ" 1207 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1208 { 1209 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1210 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1211 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1212 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1213 int *cmap, *idx_p; 1214 1215 PetscFunctionBegin; 1216 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1217 mat->getrowactive = PETSC_TRUE; 1218 1219 if (!mat->rowvalues && (idx || v)) { 1220 /* 1221 allocate enough space to hold information from the longest row. 1222 */ 1223 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1224 int max = 1,m = mat->m,tmp; 1225 for ( i=0; i<m; i++ ) { 1226 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1227 if (max < tmp) { max = tmp; } 1228 } 1229 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1230 mat->rowindices = (int *) (mat->rowvalues + max); 1231 } 1232 1233 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1234 lrow = row - rstart; 1235 1236 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1237 if (!v) {pvA = 0; pvB = 0;} 1238 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1239 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1240 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1241 nztot = nzA + nzB; 1242 1243 cmap = mat->garray; 1244 if (v || idx) { 1245 if (nztot) { 1246 /* Sort by increasing column numbers, assuming A and B already sorted */ 1247 int imark = -1; 1248 if (v) { 1249 *v = v_p = mat->rowvalues; 1250 for ( i=0; i<nzB; i++ ) { 1251 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1252 else break; 1253 } 1254 imark = i; 1255 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1256 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1257 } 1258 if (idx) { 1259 *idx = idx_p = mat->rowindices; 1260 if (imark > -1) { 1261 for ( i=0; i<imark; i++ ) { 1262 idx_p[i] = cmap[cworkB[i]]; 1263 } 1264 } else { 1265 for ( i=0; i<nzB; i++ ) { 1266 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1267 else break; 1268 } 1269 imark = i; 1270 } 1271 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1272 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1273 } 1274 } else { 1275 if (idx) *idx = 0; 1276 if (v) *v = 0; 1277 } 1278 } 1279 *nz = nztot; 1280 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1281 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNC__ 1286 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1287 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1288 { 1289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1290 1291 PetscFunctionBegin; 1292 if (aij->getrowactive == PETSC_FALSE) { 1293 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1294 } 1295 aij->getrowactive = PETSC_FALSE; 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNC__ 1300 #define __FUNC__ "MatNorm_MPIAIJ" 1301 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1302 { 1303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1304 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1305 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1306 double sum = 0.0; 1307 Scalar *v; 1308 1309 PetscFunctionBegin; 1310 if (aij->size == 1) { 1311 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1312 } else { 1313 if (type == NORM_FROBENIUS) { 1314 v = amat->a; 1315 for (i=0; i<amat->nz; i++ ) { 1316 #if defined(USE_PETSC_COMPLEX) 1317 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1318 #else 1319 sum += (*v)*(*v); v++; 1320 #endif 1321 } 1322 v = bmat->a; 1323 for (i=0; i<bmat->nz; i++ ) { 1324 #if defined(USE_PETSC_COMPLEX) 1325 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1326 #else 1327 sum += (*v)*(*v); v++; 1328 #endif 1329 } 1330 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1331 *norm = sqrt(*norm); 1332 } else if (type == NORM_1) { /* max column norm */ 1333 double *tmp, *tmp2; 1334 int *jj, *garray = aij->garray; 1335 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1336 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1337 PetscMemzero(tmp,aij->N*sizeof(double)); 1338 *norm = 0.0; 1339 v = amat->a; jj = amat->j; 1340 for ( j=0; j<amat->nz; j++ ) { 1341 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1342 } 1343 v = bmat->a; jj = bmat->j; 1344 for ( j=0; j<bmat->nz; j++ ) { 1345 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1346 } 1347 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1348 for ( j=0; j<aij->N; j++ ) { 1349 if (tmp2[j] > *norm) *norm = tmp2[j]; 1350 } 1351 PetscFree(tmp); PetscFree(tmp2); 1352 } else if (type == NORM_INFINITY) { /* max row norm */ 1353 double ntemp = 0.0; 1354 for ( j=0; j<amat->m; j++ ) { 1355 v = amat->a + amat->i[j] + shift; 1356 sum = 0.0; 1357 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1358 sum += PetscAbsScalar(*v); v++; 1359 } 1360 v = bmat->a + bmat->i[j] + shift; 1361 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1362 sum += PetscAbsScalar(*v); v++; 1363 } 1364 if (sum > ntemp) ntemp = sum; 1365 } 1366 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1367 } else { 1368 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1369 } 1370 } 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNC__ 1375 #define __FUNC__ "MatTranspose_MPIAIJ" 1376 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1377 { 1378 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1379 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1380 int ierr,shift = Aloc->indexshift; 1381 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1382 Mat B; 1383 Scalar *array; 1384 1385 PetscFunctionBegin; 1386 if (matout == PETSC_NULL && M != N) { 1387 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1388 } 1389 1390 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1391 1392 /* copy over the A part */ 1393 Aloc = (Mat_SeqAIJ*) a->A->data; 1394 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1395 row = a->rstart; 1396 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1397 for ( i=0; i<m; i++ ) { 1398 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1399 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1400 } 1401 aj = Aloc->j; 1402 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1403 1404 /* copy over the B part */ 1405 Aloc = (Mat_SeqAIJ*) a->B->data; 1406 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1407 row = a->rstart; 1408 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1409 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1410 for ( i=0; i<m; i++ ) { 1411 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1412 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1413 } 1414 PetscFree(ct); 1415 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1416 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1417 if (matout != PETSC_NULL) { 1418 *matout = B; 1419 } else { 1420 PetscOps *Abops; 1421 struct _MatOps *Aops; 1422 1423 /* This isn't really an in-place transpose .... but free data structures from a */ 1424 PetscFree(a->rowners); 1425 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1426 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1427 #if defined (USE_CTABLE) 1428 if (a->colmap) TableDelete(a->colmap); 1429 #else 1430 if (a->colmap) PetscFree(a->colmap); 1431 #endif 1432 if (a->garray) PetscFree(a->garray); 1433 if (a->lvec) VecDestroy(a->lvec); 1434 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1435 PetscFree(a); 1436 1437 /* 1438 This is horrible, horrible code. We need to keep the 1439 A pointers for the bops and ops but copy everything 1440 else from C. 1441 */ 1442 Abops = A->bops; 1443 Aops = A->ops; 1444 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1445 A->bops = Abops; 1446 A->ops = Aops; 1447 PetscHeaderDestroy(B); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNC__ 1453 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1454 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1455 { 1456 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1457 Mat a = aij->A, b = aij->B; 1458 int ierr,s1,s2,s3; 1459 1460 PetscFunctionBegin; 1461 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1462 if (rr) { 1463 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1464 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1465 /* Overlap communication with computation. */ 1466 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1467 } 1468 if (ll) { 1469 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1470 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1471 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1472 } 1473 /* scale the diagonal block */ 1474 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1475 1476 if (rr) { 1477 /* Do a scatter end and then right scale the off-diagonal block */ 1478 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1479 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1480 } 1481 1482 PetscFunctionReturn(0); 1483 } 1484 1485 1486 extern int MatPrintHelp_SeqAIJ(Mat); 1487 #undef __FUNC__ 1488 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1489 int MatPrintHelp_MPIAIJ(Mat A) 1490 { 1491 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1492 int ierr; 1493 1494 PetscFunctionBegin; 1495 if (!a->rank) { 1496 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1497 } 1498 PetscFunctionReturn(0); 1499 } 1500 1501 #undef __FUNC__ 1502 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1503 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1504 { 1505 PetscFunctionBegin; 1506 *bs = 1; 1507 PetscFunctionReturn(0); 1508 } 1509 #undef __FUNC__ 1510 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1511 int MatSetUnfactored_MPIAIJ(Mat A) 1512 { 1513 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1514 int ierr; 1515 1516 PetscFunctionBegin; 1517 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNC__ 1522 #define __FUNC__ "MatEqual_MPIAIJ" 1523 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1524 { 1525 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1526 Mat a, b, c, d; 1527 PetscTruth flg; 1528 int ierr; 1529 1530 PetscFunctionBegin; 1531 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1532 a = matA->A; b = matA->B; 1533 c = matB->A; d = matB->B; 1534 1535 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1536 if (flg == PETSC_TRUE) { 1537 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1538 } 1539 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNC__ 1544 #define __FUNC__ "MatCopy_MPIAIJ" 1545 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1546 { 1547 int ierr; 1548 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1549 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1550 1551 PetscFunctionBegin; 1552 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1553 /* because of the column compression in the off-processor part of the matrix a->B, 1554 the number of columns in a->B and b->B may be different, hence we cannot call 1555 the MatCopy() directly on the two parts. If need be, we can provide a more 1556 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1557 then copying the submatrices */ 1558 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1559 } else { 1560 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1561 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1567 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1568 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1569 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1570 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1571 1572 /* -------------------------------------------------------------------*/ 1573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1574 MatGetRow_MPIAIJ, 1575 MatRestoreRow_MPIAIJ, 1576 MatMult_MPIAIJ, 1577 MatMultAdd_MPIAIJ, 1578 MatMultTrans_MPIAIJ, 1579 MatMultTransAdd_MPIAIJ, 1580 0, 1581 0, 1582 0, 1583 0, 1584 0, 1585 0, 1586 MatRelax_MPIAIJ, 1587 MatTranspose_MPIAIJ, 1588 MatGetInfo_MPIAIJ, 1589 MatEqual_MPIAIJ, 1590 MatGetDiagonal_MPIAIJ, 1591 MatDiagonalScale_MPIAIJ, 1592 MatNorm_MPIAIJ, 1593 MatAssemblyBegin_MPIAIJ, 1594 MatAssemblyEnd_MPIAIJ, 1595 0, 1596 MatSetOption_MPIAIJ, 1597 MatZeroEntries_MPIAIJ, 1598 MatZeroRows_MPIAIJ, 1599 0, 1600 0, 1601 0, 1602 0, 1603 MatGetSize_MPIAIJ, 1604 MatGetLocalSize_MPIAIJ, 1605 MatGetOwnershipRange_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 0, 1610 MatDuplicate_MPIAIJ, 1611 0, 1612 0, 1613 0, 1614 0, 1615 0, 1616 MatGetSubMatrices_MPIAIJ, 1617 MatIncreaseOverlap_MPIAIJ, 1618 MatGetValues_MPIAIJ, 1619 MatCopy_MPIAIJ, 1620 MatPrintHelp_MPIAIJ, 1621 MatScale_MPIAIJ, 1622 0, 1623 0, 1624 0, 1625 MatGetBlockSize_MPIAIJ, 1626 0, 1627 0, 1628 0, 1629 0, 1630 MatFDColoringCreate_MPIAIJ, 1631 0, 1632 MatSetUnfactored_MPIAIJ, 1633 0, 1634 0, 1635 MatGetSubMatrix_MPIAIJ, 1636 0, 1637 0, 1638 MatGetMaps_Petsc}; 1639 1640 /* ----------------------------------------------------------------------------------------*/ 1641 1642 EXTERN_C_BEGIN 1643 #undef __FUNC__ 1644 #define __FUNC__ "MatStoreValues_MPIAIJ" 1645 int MatStoreValues_MPIAIJ(Mat mat) 1646 { 1647 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1648 int ierr; 1649 1650 PetscFunctionBegin; 1651 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1652 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 EXTERN_C_END 1656 1657 EXTERN_C_BEGIN 1658 #undef __FUNC__ 1659 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1660 int MatRetrieveValues_MPIAIJ(Mat mat) 1661 { 1662 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1663 int ierr; 1664 1665 PetscFunctionBegin; 1666 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1667 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 EXTERN_C_END 1671 1672 #include "pc.h" 1673 EXTERN_C_BEGIN 1674 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1675 EXTERN_C_END 1676 1677 #undef __FUNC__ 1678 #define __FUNC__ "MatCreateMPIAIJ" 1679 /*@C 1680 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1681 (the default parallel PETSc format). For good matrix assembly performance 1682 the user should preallocate the matrix storage by setting the parameters 1683 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1684 performance can be increased by more than a factor of 50. 1685 1686 Collective on MPI_Comm 1687 1688 Input Parameters: 1689 + comm - MPI communicator 1690 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1691 This value should be the same as the local size used in creating the 1692 y vector for the matrix-vector product y = Ax. 1693 . n - This value should be the same as the local size used in creating the 1694 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1695 calculated if N is given) For square matrices n is almost always m. 1696 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1697 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1698 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1699 (same value is used for all local rows) 1700 . d_nnz - array containing the number of nonzeros in the various rows of the 1701 DIAGONAL portion of the local submatrix (possibly different for each row) 1702 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1703 The size of this array is equal to the number of local rows, i.e 'm'. 1704 You must leave room for the diagonal entry even if it is zero. 1705 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1706 submatrix (same value is used for all local rows). 1707 - o_nnz - array containing the number of nonzeros in the various rows of the 1708 OFF-DIAGONAL portion of the local submatrix (possibly different for 1709 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1710 structure. The size of this array is equal to the number 1711 of local rows, i.e 'm'. 1712 1713 Output Parameter: 1714 . A - the matrix 1715 1716 Notes: 1717 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1718 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1719 storage requirements for this matrix. 1720 1721 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1722 processor than it must be used on all processors that share the object for 1723 that argument. 1724 1725 The AIJ format (also called the Yale sparse matrix format or 1726 compressed row storage), is fully compatible with standard Fortran 77 1727 storage. That is, the stored row and column indices can begin at 1728 either one (as in Fortran) or zero. See the users manual for details. 1729 1730 The user MUST specify either the local or global matrix dimensions 1731 (possibly both). 1732 1733 The parallel matrix is partitioned such that the first m0 rows belong to 1734 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1735 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1736 1737 The DIAGONAL portion of the local submatrix of a processor can be defined 1738 as the submatrix which is obtained by extraction the part corresponding 1739 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1740 first row that belongs to the processor, and r2 is the last row belonging 1741 to the this processor. This is a square mxm matrix. The remaining portion 1742 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1743 1744 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1745 1746 By default, this format uses inodes (identical nodes) when possible. 1747 We search for consecutive rows with the same nonzero structure, thereby 1748 reusing matrix information to achieve increased efficiency. 1749 1750 Options Database Keys: 1751 + -mat_aij_no_inode - Do not use inodes 1752 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1753 - -mat_aij_oneindex - Internally use indexing starting at 1 1754 rather than 0. Note that when calling MatSetValues(), 1755 the user still MUST index entries starting at 0! 1756 . -mat_mpi - use the parallel matrix data structures even on one processor 1757 (defaults to using SeqBAIJ format on one processor) 1758 . -mat_mpi - use the parallel matrix data structures even on one processor 1759 (defaults to using SeqAIJ format on one processor) 1760 1761 1762 Example usage: 1763 1764 Consider the following 8x8 matrix with 34 non-zero values, that is 1765 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1766 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1767 as follows: 1768 1769 .vb 1770 1 2 0 | 0 3 0 | 0 4 1771 Proc0 0 5 6 | 7 0 0 | 8 0 1772 9 0 10 | 11 0 0 | 12 0 1773 ------------------------------------- 1774 13 0 14 | 15 16 17 | 0 0 1775 Proc1 0 18 0 | 19 20 21 | 0 0 1776 0 0 0 | 22 23 0 | 24 0 1777 ------------------------------------- 1778 Proc2 25 26 27 | 0 0 28 | 29 0 1779 30 0 0 | 31 32 33 | 0 34 1780 .ve 1781 1782 This can be represented as a collection of submatrices as: 1783 1784 .vb 1785 A B C 1786 D E F 1787 G H I 1788 .ve 1789 1790 Where the submatrices A,B,C are owned by proc0, D,E,F are 1791 owned by proc1, G,H,I are owned by proc2. 1792 1793 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1794 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1795 The 'M','N' parameters are 8,8, and have the same values on all procs. 1796 1797 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1798 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1799 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1800 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1801 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1802 matrix, ans [DF] as another SeqAIJ matrix. 1803 1804 When d_nz, o_nz parameters are specified, d_nz storage elements are 1805 allocated for every row of the local diagonal submatrix, and o_nz 1806 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1807 One way to choose d_nz and o_nz is to use the max nonzerors per local 1808 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1809 In this case, the values of d_nz,o_nz are: 1810 .vb 1811 proc0 : dnz = 2, o_nz = 2 1812 proc1 : dnz = 3, o_nz = 2 1813 proc2 : dnz = 1, o_nz = 4 1814 .ve 1815 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1816 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1817 for proc3. i.e we are using 12+15+10=37 storage locations to store 1818 34 values. 1819 1820 When d_nnz, o_nnz parameters are specified, the storage is specified 1821 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1822 In the above case the values for d_nnz,o_nnz are: 1823 .vb 1824 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1825 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1826 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1827 .ve 1828 Here the space allocated is sum of all the above values i.e 34, and 1829 hence pre-allocation is perfect. 1830 1831 Level: intermediate 1832 1833 .keywords: matrix, aij, compressed row, sparse, parallel 1834 1835 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1836 @*/ 1837 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1838 { 1839 Mat B; 1840 Mat_MPIAIJ *b; 1841 int ierr, i,size,flag1 = 0, flag2 = 0; 1842 1843 PetscFunctionBegin; 1844 MPI_Comm_size(comm,&size); 1845 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1846 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1847 if (!flag1 && !flag2 && size == 1) { 1848 if (M == PETSC_DECIDE) M = m; 1849 if (N == PETSC_DECIDE) N = n; 1850 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 *A = 0; 1855 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1856 PLogObjectCreate(B); 1857 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1858 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1859 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1860 B->ops->destroy = MatDestroy_MPIAIJ; 1861 B->ops->view = MatView_MPIAIJ; 1862 B->factor = 0; 1863 B->assembled = PETSC_FALSE; 1864 B->mapping = 0; 1865 1866 B->insertmode = NOT_SET_VALUES; 1867 b->size = size; 1868 MPI_Comm_rank(comm,&b->rank); 1869 1870 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1871 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1872 } 1873 1874 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1875 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1876 b->m = m; B->m = m; 1877 b->n = n; B->n = n; 1878 b->N = N; B->N = N; 1879 b->M = M; B->M = M; 1880 1881 /* the information in the maps duplicates the information computed below, eventually 1882 we should remove the duplicate information that is not contained in the maps */ 1883 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1884 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1885 1886 /* build local table of row and column ownerships */ 1887 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1888 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1889 b->cowners = b->rowners + b->size + 2; 1890 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1891 b->rowners[0] = 0; 1892 for ( i=2; i<=b->size; i++ ) { 1893 b->rowners[i] += b->rowners[i-1]; 1894 } 1895 b->rstart = b->rowners[b->rank]; 1896 b->rend = b->rowners[b->rank+1]; 1897 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1898 b->cowners[0] = 0; 1899 for ( i=2; i<=b->size; i++ ) { 1900 b->cowners[i] += b->cowners[i-1]; 1901 } 1902 b->cstart = b->cowners[b->rank]; 1903 b->cend = b->cowners[b->rank+1]; 1904 1905 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1906 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1907 PLogObjectParent(B,b->A); 1908 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1909 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1910 PLogObjectParent(B,b->B); 1911 1912 /* build cache for off array entries formed */ 1913 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1914 b->donotstash = 0; 1915 b->colmap = 0; 1916 b->garray = 0; 1917 b->roworiented = 1; 1918 1919 /* stuff used for matrix vector multiply */ 1920 b->lvec = 0; 1921 b->Mvctx = 0; 1922 1923 /* stuff for MatGetRow() */ 1924 b->rowindices = 0; 1925 b->rowvalues = 0; 1926 b->getrowactive = PETSC_FALSE; 1927 1928 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1929 "MatStoreValues_MPIAIJ", 1930 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1931 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1932 "MatRetrieveValues_MPIAIJ", 1933 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1934 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1935 "MatGetDiagonalBlock_MPIAIJ", 1936 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1937 *A = B; 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNC__ 1942 #define __FUNC__ "MatDuplicate_MPIAIJ" 1943 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1944 { 1945 Mat mat; 1946 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1947 int ierr, len=0, flg; 1948 1949 PetscFunctionBegin; 1950 *newmat = 0; 1951 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1952 PLogObjectCreate(mat); 1953 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1954 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1955 mat->ops->destroy = MatDestroy_MPIAIJ; 1956 mat->ops->view = MatView_MPIAIJ; 1957 mat->factor = matin->factor; 1958 mat->assembled = PETSC_TRUE; 1959 1960 a->m = mat->m = oldmat->m; 1961 a->n = mat->n = oldmat->n; 1962 a->M = mat->M = oldmat->M; 1963 a->N = mat->N = oldmat->N; 1964 1965 a->rstart = oldmat->rstart; 1966 a->rend = oldmat->rend; 1967 a->cstart = oldmat->cstart; 1968 a->cend = oldmat->cend; 1969 a->size = oldmat->size; 1970 a->rank = oldmat->rank; 1971 mat->insertmode = NOT_SET_VALUES; 1972 a->rowvalues = 0; 1973 a->getrowactive = PETSC_FALSE; 1974 1975 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1976 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1977 a->cowners = a->rowners + a->size + 2; 1978 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1979 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1980 if (oldmat->colmap) { 1981 #if defined (USE_CTABLE) 1982 ierr = TableCreateCopy( &a->colmap, oldmat->colmap ); CHKERRQ(ierr); 1983 #else 1984 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1985 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1986 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1987 #endif 1988 } else a->colmap = 0; 1989 if (oldmat->garray) { 1990 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1991 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1992 PLogObjectMemory(mat,len*sizeof(int)); 1993 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1994 } else a->garray = 0; 1995 1996 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1997 PLogObjectParent(mat,a->lvec); 1998 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1999 PLogObjectParent(mat,a->Mvctx); 2000 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2001 PLogObjectParent(mat,a->A); 2002 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2003 PLogObjectParent(mat,a->B); 2004 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2005 if (flg) { 2006 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2007 } 2008 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2009 *newmat = mat; 2010 PetscFunctionReturn(0); 2011 } 2012 2013 #include "sys.h" 2014 2015 #undef __FUNC__ 2016 #define __FUNC__ "MatLoad_MPIAIJ" 2017 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2018 { 2019 Mat A; 2020 Scalar *vals,*svals; 2021 MPI_Comm comm = ((PetscObject)viewer)->comm; 2022 MPI_Status status; 2023 int i, nz, ierr, j,rstart, rend, fd; 2024 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2025 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2026 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2027 2028 PetscFunctionBegin; 2029 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2030 if (!rank) { 2031 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2032 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2033 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2034 if (header[3] < 0) { 2035 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2036 } 2037 } 2038 2039 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2040 M = header[1]; N = header[2]; 2041 /* determine ownership of all rows */ 2042 m = M/size + ((M % size) > rank); 2043 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2044 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2045 rowners[0] = 0; 2046 for ( i=2; i<=size; i++ ) { 2047 rowners[i] += rowners[i-1]; 2048 } 2049 rstart = rowners[rank]; 2050 rend = rowners[rank+1]; 2051 2052 /* distribute row lengths to all processors */ 2053 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2054 offlens = ourlens + (rend-rstart); 2055 if (!rank) { 2056 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2057 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2058 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2059 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2060 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2061 PetscFree(sndcounts); 2062 } else { 2063 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2064 } 2065 2066 if (!rank) { 2067 /* calculate the number of nonzeros on each processor */ 2068 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2069 PetscMemzero(procsnz,size*sizeof(int)); 2070 for ( i=0; i<size; i++ ) { 2071 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2072 procsnz[i] += rowlengths[j]; 2073 } 2074 } 2075 PetscFree(rowlengths); 2076 2077 /* determine max buffer needed and allocate it */ 2078 maxnz = 0; 2079 for ( i=0; i<size; i++ ) { 2080 maxnz = PetscMax(maxnz,procsnz[i]); 2081 } 2082 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2083 2084 /* read in my part of the matrix column indices */ 2085 nz = procsnz[0]; 2086 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2087 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2088 2089 /* read in every one elses and ship off */ 2090 for ( i=1; i<size; i++ ) { 2091 nz = procsnz[i]; 2092 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2093 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2094 } 2095 PetscFree(cols); 2096 } else { 2097 /* determine buffer space needed for message */ 2098 nz = 0; 2099 for ( i=0; i<m; i++ ) { 2100 nz += ourlens[i]; 2101 } 2102 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2103 2104 /* receive message of column indices*/ 2105 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2106 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2107 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2108 } 2109 2110 /* determine column ownership if matrix is not square */ 2111 if (N != M) { 2112 n = N/size + ((N % size) > rank); 2113 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2114 cstart = cend - n; 2115 } else { 2116 cstart = rstart; 2117 cend = rend; 2118 n = cend - cstart; 2119 } 2120 2121 /* loop over local rows, determining number of off diagonal entries */ 2122 PetscMemzero(offlens,m*sizeof(int)); 2123 jj = 0; 2124 for ( i=0; i<m; i++ ) { 2125 for ( j=0; j<ourlens[i]; j++ ) { 2126 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2127 jj++; 2128 } 2129 } 2130 2131 /* create our matrix */ 2132 for ( i=0; i<m; i++ ) { 2133 ourlens[i] -= offlens[i]; 2134 } 2135 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2136 A = *newmat; 2137 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2138 for ( i=0; i<m; i++ ) { 2139 ourlens[i] += offlens[i]; 2140 } 2141 2142 if (!rank) { 2143 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2144 2145 /* read in my part of the matrix numerical values */ 2146 nz = procsnz[0]; 2147 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2148 2149 /* insert into matrix */ 2150 jj = rstart; 2151 smycols = mycols; 2152 svals = vals; 2153 for ( i=0; i<m; i++ ) { 2154 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2155 smycols += ourlens[i]; 2156 svals += ourlens[i]; 2157 jj++; 2158 } 2159 2160 /* read in other processors and ship out */ 2161 for ( i=1; i<size; i++ ) { 2162 nz = procsnz[i]; 2163 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2164 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2165 } 2166 PetscFree(procsnz); 2167 } else { 2168 /* receive numeric values */ 2169 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2170 2171 /* receive message of values*/ 2172 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2173 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2174 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2175 2176 /* insert into matrix */ 2177 jj = rstart; 2178 smycols = mycols; 2179 svals = vals; 2180 for ( i=0; i<m; i++ ) { 2181 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2182 smycols += ourlens[i]; 2183 svals += ourlens[i]; 2184 jj++; 2185 } 2186 } 2187 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2188 2189 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2190 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 #undef __FUNC__ 2195 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2196 /* 2197 Not great since it makes two copies of the submatrix, first an SeqAIJ 2198 in local and then by concatenating the local matrices the end result. 2199 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2200 */ 2201 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2202 { 2203 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2204 Mat *local,M, Mreuse; 2205 Scalar *vwork,*aa; 2206 MPI_Comm comm = mat->comm; 2207 Mat_SeqAIJ *aij; 2208 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2209 2210 PetscFunctionBegin; 2211 MPI_Comm_rank(comm,&rank); 2212 MPI_Comm_size(comm,&size); 2213 2214 if (call == MAT_REUSE_MATRIX) { 2215 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2216 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2217 local = &Mreuse; 2218 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2219 } else { 2220 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2221 Mreuse = *local; 2222 PetscFree(local); 2223 } 2224 2225 /* 2226 m - number of local rows 2227 n - number of columns (same on all processors) 2228 rstart - first row in new global matrix generated 2229 */ 2230 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2231 if (call == MAT_INITIAL_MATRIX) { 2232 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2233 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2234 ii = aij->i; 2235 jj = aij->j; 2236 2237 /* 2238 Determine the number of non-zeros in the diagonal and off-diagonal 2239 portions of the matrix in order to do correct preallocation 2240 */ 2241 2242 /* first get start and end of "diagonal" columns */ 2243 if (csize == PETSC_DECIDE) { 2244 nlocal = n/size + ((n % size) > rank); 2245 } else { 2246 nlocal = csize; 2247 } 2248 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2249 rstart = rend - nlocal; 2250 if (rank == size - 1 && rend != n) { 2251 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2252 } 2253 2254 /* next, compute all the lengths */ 2255 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2256 olens = dlens + m; 2257 for ( i=0; i<m; i++ ) { 2258 jend = ii[i+1] - ii[i]; 2259 olen = 0; 2260 dlen = 0; 2261 for ( j=0; j<jend; j++ ) { 2262 if ( *jj < rstart || *jj >= rend) olen++; 2263 else dlen++; 2264 jj++; 2265 } 2266 olens[i] = olen; 2267 dlens[i] = dlen; 2268 } 2269 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2270 PetscFree(dlens); 2271 } else { 2272 int ml,nl; 2273 2274 M = *newmat; 2275 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2276 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2277 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2278 /* 2279 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2280 rather than the slower MatSetValues(). 2281 */ 2282 M->was_assembled = PETSC_TRUE; 2283 M->assembled = PETSC_FALSE; 2284 } 2285 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2286 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2287 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2288 ii = aij->i; 2289 jj = aij->j; 2290 aa = aij->a; 2291 for (i=0; i<m; i++) { 2292 row = rstart + i; 2293 nz = ii[i+1] - ii[i]; 2294 cwork = jj; jj += nz; 2295 vwork = aa; aa += nz; 2296 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2297 } 2298 2299 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2300 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2301 *newmat = M; 2302 2303 /* save submatrix used in processor for next request */ 2304 if (call == MAT_INITIAL_MATRIX) { 2305 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2306 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2307 } 2308 2309 PetscFunctionReturn(0); 2310 } 2311