1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.280 1999/02/15 21:54:55 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 #if defined (USE_CTABLE) 22 int ierr; 23 #endif 24 25 PetscFunctionBegin; 26 #if defined (USE_CTABLE) 27 ierr = TableCreate( &aij->colmap, aij->n/5 ); CHKERRQ(ierr); 28 for ( i=0; i<n; i++ ){ 29 ierr = TableAdd( aij->colmap, aij->garray[i] + 1, i+1 );CHKERRQ(ierr); 30 } 31 #else 32 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 33 PLogObjectMemory(mat,aij->N*sizeof(int)); 34 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 35 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 36 #endif 37 PetscFunctionReturn(0); 38 } 39 40 extern int DisAssemble_MPIAIJ(Mat); 41 42 #define CHUNKSIZE 15 43 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 44 { \ 45 \ 46 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 47 rmax = aimax[row]; nrow = ailen[row]; \ 48 col1 = col - shift; \ 49 \ 50 low = 0; high = nrow; \ 51 while (high-low > 5) { \ 52 t = (low+high)/2; \ 53 if (rp[t] > col) high = t; \ 54 else low = t; \ 55 } \ 56 for ( _i=0; _i<nrow; _i++ ) { \ 57 if (rp[_i] > col1) break; \ 58 if (rp[_i] == col1) { \ 59 if (addv == ADD_VALUES) ap[_i] += value; \ 60 else ap[_i] = value; \ 61 goto a_noinsert; \ 62 } \ 63 } \ 64 if (nonew == 1) goto a_noinsert; \ 65 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 66 if (nrow >= rmax) { \ 67 /* there is no extra room in row, therefore enlarge */ \ 68 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 69 Scalar *new_a; \ 70 \ 71 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 72 \ 73 /* malloc new storage space */ \ 74 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 75 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 76 new_j = (int *) (new_a + new_nz); \ 77 new_i = new_j + new_nz; \ 78 \ 79 /* copy over old data into new slots */ \ 80 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 81 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 82 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 83 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 84 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 85 len*sizeof(int)); \ 86 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 87 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 88 len*sizeof(Scalar)); \ 89 /* free up old matrix storage */ \ 90 \ 91 PetscFree(a->a); \ 92 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 93 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 94 a->singlemalloc = 1; \ 95 \ 96 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 97 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 98 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 99 a->maxnz += CHUNKSIZE; \ 100 a->reallocs++; \ 101 } \ 102 N = nrow++ - 1; a->nz++; \ 103 /* shift up all the later entries in this row */ \ 104 for ( ii=N; ii>=_i; ii-- ) { \ 105 rp[ii+1] = rp[ii]; \ 106 ap[ii+1] = ap[ii]; \ 107 } \ 108 rp[_i] = col1; \ 109 ap[_i] = value; \ 110 a_noinsert: ; \ 111 ailen[row] = nrow; \ 112 } 113 114 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 115 { \ 116 \ 117 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 118 rmax = bimax[row]; nrow = bilen[row]; \ 119 col1 = col - shift; \ 120 \ 121 low = 0; high = nrow; \ 122 while (high-low > 5) { \ 123 t = (low+high)/2; \ 124 if (rp[t] > col) high = t; \ 125 else low = t; \ 126 } \ 127 for ( _i=0; _i<nrow; _i++ ) { \ 128 if (rp[_i] > col1) break; \ 129 if (rp[_i] == col1) { \ 130 if (addv == ADD_VALUES) ap[_i] += value; \ 131 else ap[_i] = value; \ 132 goto b_noinsert; \ 133 } \ 134 } \ 135 if (nonew == 1) goto b_noinsert; \ 136 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 137 if (nrow >= rmax) { \ 138 /* there is no extra room in row, therefore enlarge */ \ 139 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 140 Scalar *new_a; \ 141 \ 142 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 143 \ 144 /* malloc new storage space */ \ 145 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 146 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 147 new_j = (int *) (new_a + new_nz); \ 148 new_i = new_j + new_nz; \ 149 \ 150 /* copy over old data into new slots */ \ 151 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 152 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 153 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 154 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 155 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 156 len*sizeof(int)); \ 157 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 158 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 159 len*sizeof(Scalar)); \ 160 /* free up old matrix storage */ \ 161 \ 162 PetscFree(b->a); \ 163 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 164 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 165 b->singlemalloc = 1; \ 166 \ 167 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 168 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 169 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 170 b->maxnz += CHUNKSIZE; \ 171 b->reallocs++; \ 172 } \ 173 N = nrow++ - 1; b->nz++; \ 174 /* shift up all the later entries in this row */ \ 175 for ( ii=N; ii>=_i; ii-- ) { \ 176 rp[ii+1] = rp[ii]; \ 177 ap[ii+1] = ap[ii]; \ 178 } \ 179 rp[_i] = col1; \ 180 ap[_i] = value; \ 181 b_noinsert: ; \ 182 bilen[row] = nrow; \ 183 } 184 185 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 186 #undef __FUNC__ 187 #define __FUNC__ "MatSetValues_MPIAIJ" 188 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 191 Scalar value; 192 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 193 int cstart = aij->cstart, cend = aij->cend,row,col; 194 int roworiented = aij->roworiented; 195 196 /* Some Variables required in the macro */ 197 Mat A = aij->A; 198 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 199 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 200 Scalar *aa = a->a; 201 202 Mat B = aij->B; 203 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 204 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 205 Scalar *ba = b->a; 206 207 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 208 int nonew = a->nonew,shift = a->indexshift; 209 Scalar *ap; 210 211 PetscFunctionBegin; 212 for ( i=0; i<m; i++ ) { 213 if (im[i] < 0) continue; 214 #if defined(USE_PETSC_BOPT_g) 215 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 216 #endif 217 if (im[i] >= rstart && im[i] < rend) { 218 row = im[i] - rstart; 219 for ( j=0; j<n; j++ ) { 220 if (in[j] >= cstart && in[j] < cend){ 221 col = in[j] - cstart; 222 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } 226 else if (in[j] < 0) continue; 227 #if defined(USE_PETSC_BOPT_g) 228 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 229 #endif 230 else { 231 if (mat->was_assembled) { 232 if (!aij->colmap) { 233 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 234 } 235 #if defined (USE_CTABLE) 236 col = TableFind( aij->colmap, in[j] + 1 ) - 1; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ *) B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (roworiented && !aij->donotstash) { 257 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 258 } else { 259 if (!aij->donotstash) { 260 row = im[i]; 261 for ( j=0; j<n; j++ ) { 262 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNC__ 272 #define __FUNC__ "MatGetValues_MPIAIJ" 273 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 276 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 277 int cstart = aij->cstart, cend = aij->cend,row,col; 278 279 PetscFunctionBegin; 280 for ( i=0; i<m; i++ ) { 281 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 282 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 283 if (idxm[i] >= rstart && idxm[i] < rend) { 284 row = idxm[i] - rstart; 285 for ( j=0; j<n; j++ ) { 286 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 287 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 288 if (idxn[j] >= cstart && idxn[j] < cend){ 289 col = idxn[j] - cstart; 290 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 291 } else { 292 if (!aij->colmap) { 293 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 294 } 295 #if defined (USE_CTABLE) 296 col = TableFind( aij->colmap, idxn[j] + 1 ) - 1; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 318 MPI_Comm comm = mat->comm; 319 int size = aij->size, *owners = aij->rowners; 320 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 321 MPI_Request *send_waits,*recv_waits; 322 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 323 InsertMode addv; 324 Scalar *rvalues,*svalues; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 aij->svalues = 0; aij->rvalues = 0; 329 aij->nsends = 0; aij->nrecvs = 0; 330 aij->send_waits = 0; aij->recv_waits = 0; 331 aij->rmax = 0; 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 /* first count number of contributors to each processor */ 343 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 344 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 345 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 346 for ( i=0; i<aij->stash.n; i++ ) { 347 idx = aij->stash.idx[i]; 348 for ( j=0; j<size; j++ ) { 349 if (idx >= owners[j] && idx < owners[j+1]) { 350 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 351 } 352 } 353 } 354 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 355 356 /* inform other processors of number of messages and max length*/ 357 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 358 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 359 nreceives = work[rank]; 360 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 361 nmax = work[rank]; 362 PetscFree(work); 363 364 /* post receives: 365 1) each message will consist of ordered pairs 366 (global index,value) we store the global index as a double 367 to simplify the message passing. 368 2) since we don't know how long each individual message is we 369 allocate the largest needed buffer for each receive. Potentially 370 this is a lot of wasted space. 371 372 373 This could be done better. 374 */ 375 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 376 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 377 for ( i=0; i<nreceives; i++ ) { 378 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 379 comm,recv_waits+i);CHKERRQ(ierr); 380 } 381 382 /* do sends: 383 1) starts[i] gives the starting index in svalues for stuff going to 384 the ith processor 385 */ 386 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 387 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 388 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 389 starts[0] = 0; 390 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 391 for ( i=0; i<aij->stash.n; i++ ) { 392 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 393 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 394 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 395 } 396 PetscFree(owner); 397 starts[0] = 0; 398 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 399 count = 0; 400 for ( i=0; i<size; i++ ) { 401 if (procs[i]) { 402 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 403 } 404 } 405 PetscFree(starts); 406 PetscFree(nprocs); 407 408 /* Free cache space */ 409 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 410 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 411 412 aij->svalues = svalues; aij->rvalues = rvalues; 413 aij->nsends = nsends; aij->nrecvs = nreceives; 414 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 415 aij->rmax = nmax; 416 417 PetscFunctionReturn(0); 418 } 419 extern int MatSetUpMultiply_MPIAIJ(Mat); 420 421 #undef __FUNC__ 422 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 423 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 424 { 425 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 426 MPI_Status *send_status,recv_status; 427 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 428 int row,col,other_disassembled; 429 Scalar *values,val; 430 InsertMode addv = mat->insertmode; 431 432 PetscFunctionBegin; 433 434 /* wait on receives */ 435 while (count) { 436 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 437 /* unpack receives into our local space */ 438 values = aij->rvalues + 3*imdex*aij->rmax; 439 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 440 n = n/3; 441 for ( i=0; i<n; i++ ) { 442 row = (int) PetscReal(values[3*i]) - aij->rstart; 443 col = (int) PetscReal(values[3*i+1]); 444 val = values[3*i+2]; 445 if (col >= aij->cstart && col < aij->cend) { 446 col -= aij->cstart; 447 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 448 } else { 449 if (mat->was_assembled || mat->assembled) { 450 if (!aij->colmap) { 451 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 452 } 453 #if defined (USE_CTABLE) 454 col = TableFind( aij->colmap, col + 1 ) - 1; 455 #else 456 col = aij->colmap[col] - 1; 457 #endif 458 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 col = (int) PetscReal(values[3*i+1]); 461 } 462 } 463 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 464 } 465 } 466 count--; 467 } 468 if (aij->recv_waits) PetscFree(aij->recv_waits); 469 if (aij->rvalues) PetscFree(aij->rvalues); 470 471 /* wait on sends */ 472 if (aij->nsends) { 473 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 474 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 475 PetscFree(send_status); 476 } 477 if (aij->send_waits) PetscFree(aij->send_waits); 478 if (aij->svalues) PetscFree(aij->svalues); 479 480 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 481 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 482 483 /* determine if any processor has disassembled, if so we must 484 also disassemble ourselfs, in order that we may reassemble. */ 485 /* 486 if nonzero structure of submatrix B cannot change then we know that 487 no processor disassembled thus we can skip this stuff 488 */ 489 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 490 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 491 if (mat->was_assembled && !other_disassembled) { 492 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 493 } 494 } 495 496 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 497 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 498 } 499 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 500 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 501 502 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNC__ 507 #define __FUNC__ "MatZeroEntries_MPIAIJ" 508 int MatZeroEntries_MPIAIJ(Mat A) 509 { 510 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 511 int ierr; 512 513 PetscFunctionBegin; 514 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 515 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNC__ 520 #define __FUNC__ "MatZeroRows_MPIAIJ" 521 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 522 { 523 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 524 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 525 int *procs,*nprocs,j,found,idx,nsends,*work,row; 526 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 527 int *rvalues,tag = A->tag,count,base,slen,n,*source; 528 int *lens,imdex,*lrows,*values,rstart=l->rstart; 529 MPI_Comm comm = A->comm; 530 MPI_Request *send_waits,*recv_waits; 531 MPI_Status recv_status,*send_status; 532 IS istmp; 533 534 PetscFunctionBegin; 535 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 536 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 537 538 /* first count number of contributors to each processor */ 539 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 540 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 541 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 542 for ( i=0; i<N; i++ ) { 543 idx = rows[i]; 544 found = 0; 545 for ( j=0; j<size; j++ ) { 546 if (idx >= owners[j] && idx < owners[j+1]) { 547 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 548 } 549 } 550 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 551 } 552 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 553 554 /* inform other processors of number of messages and max length*/ 555 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 556 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 557 nrecvs = work[rank]; 558 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 559 nmax = work[rank]; 560 PetscFree(work); 561 562 /* post receives: */ 563 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 564 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 565 for ( i=0; i<nrecvs; i++ ) { 566 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 567 } 568 569 /* do sends: 570 1) starts[i] gives the starting index in svalues for stuff going to 571 the ith processor 572 */ 573 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 574 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 575 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 576 starts[0] = 0; 577 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 578 for ( i=0; i<N; i++ ) { 579 svalues[starts[owner[i]]++] = rows[i]; 580 } 581 ISRestoreIndices(is,&rows); 582 583 starts[0] = 0; 584 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 585 count = 0; 586 for ( i=0; i<size; i++ ) { 587 if (procs[i]) { 588 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 589 } 590 } 591 PetscFree(starts); 592 593 base = owners[rank]; 594 595 /* wait on receives */ 596 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 597 source = lens + nrecvs; 598 count = nrecvs; slen = 0; 599 while (count) { 600 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 601 /* unpack receives into our local space */ 602 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 603 source[imdex] = recv_status.MPI_SOURCE; 604 lens[imdex] = n; 605 slen += n; 606 count--; 607 } 608 PetscFree(recv_waits); 609 610 /* move the data into the send scatter */ 611 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 612 count = 0; 613 for ( i=0; i<nrecvs; i++ ) { 614 values = rvalues + i*nmax; 615 for ( j=0; j<lens[i]; j++ ) { 616 lrows[count++] = values[j] - base; 617 } 618 } 619 PetscFree(rvalues); PetscFree(lens); 620 PetscFree(owner); PetscFree(nprocs); 621 622 /* actually zap the local rows */ 623 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 624 PLogObjectParent(A,istmp); 625 626 /* 627 Zero the required rows. If the "diagonal block" of the matrix 628 is square and the user wishes to set the diagonal we use seperate 629 code so that MatSetValues() is not called for each diagonal allocating 630 new memory, thus calling lots of mallocs and slowing things down. 631 632 Contributed by: Mathew Knepley 633 */ 634 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 635 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 636 if (diag && (l->A->M == l->A->N)) { 637 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 638 } else if (diag) { 639 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 640 for ( i = 0; i < slen; i++ ) { 641 row = lrows[i] + rstart; 642 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 643 } 644 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 645 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 646 } else { 647 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 648 } 649 ierr = ISDestroy(istmp); CHKERRQ(ierr); 650 PetscFree(lrows); 651 652 /* wait on sends */ 653 if (nsends) { 654 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 655 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 656 PetscFree(send_status); 657 } 658 PetscFree(send_waits); PetscFree(svalues); 659 660 PetscFunctionReturn(0); 661 } 662 663 #undef __FUNC__ 664 #define __FUNC__ "MatMult_MPIAIJ" 665 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 666 { 667 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 668 int ierr,nt; 669 670 PetscFunctionBegin; 671 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 672 if (nt != a->n) { 673 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 674 } 675 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 676 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 677 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 678 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNC__ 683 #define __FUNC__ "MatMultAdd_MPIAIJ" 684 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 685 { 686 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 687 int ierr; 688 689 PetscFunctionBegin; 690 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 691 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 692 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 693 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNC__ 698 #define __FUNC__ "MatMultTrans_MPIAIJ" 699 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 700 { 701 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 702 int ierr; 703 704 PetscFunctionBegin; 705 /* do nondiagonal part */ 706 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 707 /* send it on its way */ 708 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 709 /* do local part */ 710 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 711 /* receive remote parts: note this assumes the values are not actually */ 712 /* inserted in yy until the next line, which is true for my implementation*/ 713 /* but is not perhaps always true. */ 714 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNC__ 719 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 720 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 721 { 722 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 723 int ierr; 724 725 PetscFunctionBegin; 726 /* do nondiagonal part */ 727 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 728 /* send it on its way */ 729 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 730 /* do local part */ 731 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 732 /* receive remote parts: note this assumes the values are not actually */ 733 /* inserted in yy until the next line, which is true for my implementation*/ 734 /* but is not perhaps always true. */ 735 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 /* 740 This only works correctly for square matrices where the subblock A->A is the 741 diagonal block 742 */ 743 #undef __FUNC__ 744 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 745 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 746 { 747 int ierr; 748 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 749 750 PetscFunctionBegin; 751 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 752 if (a->rstart != a->cstart || a->rend != a->cend) { 753 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 754 } 755 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNC__ 760 #define __FUNC__ "MatScale_MPIAIJ" 761 int MatScale_MPIAIJ(Scalar *aa,Mat A) 762 { 763 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 764 int ierr; 765 766 PetscFunctionBegin; 767 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 768 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNC__ 773 #define __FUNC__ "MatDestroy_MPIAIJ" 774 int MatDestroy_MPIAIJ(Mat mat) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 777 int ierr; 778 779 PetscFunctionBegin; 780 if (--mat->refct > 0) PetscFunctionReturn(0); 781 782 if (mat->mapping) { 783 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 784 } 785 if (mat->bmapping) { 786 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 787 } 788 if (mat->rmap) { 789 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 790 } 791 if (mat->cmap) { 792 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 793 } 794 #if defined(USE_PETSC_LOG) 795 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 796 #endif 797 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 798 PetscFree(aij->rowners); 799 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 800 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 801 #if defined (USE_CTABLE) 802 if (aij->colmap) TableDelete(aij->colmap); 803 #else 804 if (aij->colmap) PetscFree(aij->colmap); 805 #endif 806 if (aij->garray) PetscFree(aij->garray); 807 if (aij->lvec) VecDestroy(aij->lvec); 808 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 809 if (aij->rowvalues) PetscFree(aij->rowvalues); 810 PetscFree(aij); 811 PLogObjectDestroy(mat); 812 PetscHeaderDestroy(mat); 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNC__ 817 #define __FUNC__ "MatView_MPIAIJ_Binary" 818 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 819 { 820 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 821 int ierr; 822 823 PetscFunctionBegin; 824 if (aij->size == 1) { 825 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 826 } 827 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 828 PetscFunctionReturn(0); 829 } 830 831 #undef __FUNC__ 832 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 833 extern int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 834 { 835 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 836 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 837 int ierr, format,shift = C->indexshift,rank = aij->rank ; 838 int size = aij->size; 839 FILE *fd; 840 ViewerType vtype; 841 842 PetscFunctionBegin; 843 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 844 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 845 ierr = ViewerGetFormat(viewer,&format); 846 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 847 MatInfo info; 848 int flg; 849 MPI_Comm_rank(mat->comm,&rank); 850 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 851 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 852 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 853 PetscSequentialPhaseBegin(mat->comm,1); 854 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 855 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 856 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 857 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 858 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 859 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 860 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 861 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 862 fflush(fd); 863 PetscSequentialPhaseEnd(mat->comm,1); 864 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 867 PetscFunctionReturn(0); 868 } 869 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 870 Draw draw; 871 PetscTruth isnull; 872 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 873 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 874 } 875 876 if (size == 1) { 877 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 878 } else { 879 /* assemble the entire matrix onto first processor. */ 880 Mat A; 881 Mat_SeqAIJ *Aloc; 882 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 883 Scalar *a; 884 885 if (!rank) { 886 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 887 } else { 888 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 889 } 890 PLogObjectParent(mat,A); 891 892 /* copy over the A part */ 893 Aloc = (Mat_SeqAIJ*) aij->A->data; 894 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 895 row = aij->rstart; 896 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 897 for ( i=0; i<m; i++ ) { 898 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 899 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 900 } 901 aj = Aloc->j; 902 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 903 904 /* copy over the B part */ 905 Aloc = (Mat_SeqAIJ*) aij->B->data; 906 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 907 row = aij->rstart; 908 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 909 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 910 for ( i=0; i<m; i++ ) { 911 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 912 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 913 } 914 PetscFree(ct); 915 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 917 /* 918 Everyone has to call to draw the matrix since the graphics waits are 919 synchronized across all processors that share the Draw object 920 */ 921 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 922 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 923 } 924 ierr = MatDestroy(A); CHKERRQ(ierr); 925 } 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNC__ 930 #define __FUNC__ "MatView_MPIAIJ" 931 int MatView_MPIAIJ(Mat mat,Viewer viewer) 932 { 933 int ierr; 934 ViewerType vtype; 935 936 PetscFunctionBegin; 937 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 938 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 939 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 940 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 941 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 942 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 943 } else { 944 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 945 } 946 PetscFunctionReturn(0); 947 } 948 949 /* 950 This has to provide several versions. 951 952 2) a) use only local smoothing updating outer values only once. 953 b) local smoothing updating outer values each inner iteration 954 3) color updating out values betwen colors. 955 */ 956 #undef __FUNC__ 957 #define __FUNC__ "MatRelax_MPIAIJ" 958 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 959 double fshift,int its,Vec xx) 960 { 961 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 962 Mat AA = mat->A, BB = mat->B; 963 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 964 Scalar *b,*x,*xs,*ls,d,*v,sum; 965 int ierr,*idx, *diag; 966 int n = mat->n, m = mat->m, i,shift = A->indexshift; 967 968 PetscFunctionBegin; 969 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 970 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 971 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 972 xs = x + shift; /* shift by one for index start of 1 */ 973 ls = ls + shift; 974 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 975 diag = A->diag; 976 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 977 if (flag & SOR_ZERO_INITIAL_GUESS) { 978 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 979 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 980 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 981 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 985 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 986 while (its--) { 987 /* go down through the rows */ 988 for ( i=0; i<m; i++ ) { 989 n = A->i[i+1] - A->i[i]; 990 PLogFlops(4*n+3); 991 idx = A->j + A->i[i] + shift; 992 v = A->a + A->i[i] + shift; 993 sum = b[i]; 994 SPARSEDENSEMDOT(sum,xs,v,idx,n); 995 d = fshift + A->a[diag[i]+shift]; 996 n = B->i[i+1] - B->i[i]; 997 idx = B->j + B->i[i] + shift; 998 v = B->a + B->i[i] + shift; 999 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1000 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1001 } 1002 /* come up through the rows */ 1003 for ( i=m-1; i>-1; i-- ) { 1004 n = A->i[i+1] - A->i[i]; 1005 PLogFlops(4*n+3) 1006 idx = A->j + A->i[i] + shift; 1007 v = A->a + A->i[i] + shift; 1008 sum = b[i]; 1009 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1010 d = fshift + A->a[diag[i]+shift]; 1011 n = B->i[i+1] - B->i[i]; 1012 idx = B->j + B->i[i] + shift; 1013 v = B->a + B->i[i] + shift; 1014 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1015 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1016 } 1017 } 1018 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1019 if (flag & SOR_ZERO_INITIAL_GUESS) { 1020 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1021 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1022 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1023 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1028 while (its--) { 1029 for ( i=0; i<m; i++ ) { 1030 n = A->i[i+1] - A->i[i]; 1031 PLogFlops(4*n+3); 1032 idx = A->j + A->i[i] + shift; 1033 v = A->a + A->i[i] + shift; 1034 sum = b[i]; 1035 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1036 d = fshift + A->a[diag[i]+shift]; 1037 n = B->i[i+1] - B->i[i]; 1038 idx = B->j + B->i[i] + shift; 1039 v = B->a + B->i[i] + shift; 1040 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1041 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1042 } 1043 } 1044 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1045 if (flag & SOR_ZERO_INITIAL_GUESS) { 1046 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1047 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1048 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1049 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 while (its--) { 1055 for ( i=m-1; i>-1; i-- ) { 1056 n = A->i[i+1] - A->i[i]; 1057 PLogFlops(4*n+3); 1058 idx = A->j + A->i[i] + shift; 1059 v = A->a + A->i[i] + shift; 1060 sum = b[i]; 1061 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1062 d = fshift + A->a[diag[i]+shift]; 1063 n = B->i[i+1] - B->i[i]; 1064 idx = B->j + B->i[i] + shift; 1065 v = B->a + B->i[i] + shift; 1066 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1067 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1068 } 1069 } 1070 } else { 1071 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1072 } 1073 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1074 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1075 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNC__ 1080 #define __FUNC__ "MatGetInfo_MPIAIJ" 1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1084 Mat A = mat->A, B = mat->B; 1085 int ierr; 1086 double isend[5], irecv[5]; 1087 1088 PetscFunctionBegin; 1089 info->block_size = 1.0; 1090 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1091 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1092 isend[3] = info->memory; isend[4] = info->mallocs; 1093 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1094 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1095 isend[3] += info->memory; isend[4] += info->mallocs; 1096 if (flag == MAT_LOCAL) { 1097 info->nz_used = isend[0]; 1098 info->nz_allocated = isend[1]; 1099 info->nz_unneeded = isend[2]; 1100 info->memory = isend[3]; 1101 info->mallocs = isend[4]; 1102 } else if (flag == MAT_GLOBAL_MAX) { 1103 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1104 info->nz_used = irecv[0]; 1105 info->nz_allocated = irecv[1]; 1106 info->nz_unneeded = irecv[2]; 1107 info->memory = irecv[3]; 1108 info->mallocs = irecv[4]; 1109 } else if (flag == MAT_GLOBAL_SUM) { 1110 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1111 info->nz_used = irecv[0]; 1112 info->nz_allocated = irecv[1]; 1113 info->nz_unneeded = irecv[2]; 1114 info->memory = irecv[3]; 1115 info->mallocs = irecv[4]; 1116 } 1117 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1118 info->fill_ratio_needed = 0; 1119 info->factor_mallocs = 0; 1120 info->rows_global = (double)mat->M; 1121 info->columns_global = (double)mat->N; 1122 info->rows_local = (double)mat->m; 1123 info->columns_local = (double)mat->N; 1124 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNC__ 1129 #define __FUNC__ "MatSetOption_MPIAIJ" 1130 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1133 int ierr; 1134 1135 PetscFunctionBegin; 1136 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1137 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1138 op == MAT_COLUMNS_UNSORTED || 1139 op == MAT_COLUMNS_SORTED || 1140 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1141 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1142 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1143 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1144 } else if (op == MAT_ROW_ORIENTED) { 1145 a->roworiented = 1; 1146 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1147 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1148 } else if (op == MAT_ROWS_SORTED || 1149 op == MAT_ROWS_UNSORTED || 1150 op == MAT_SYMMETRIC || 1151 op == MAT_STRUCTURALLY_SYMMETRIC || 1152 op == MAT_YES_NEW_DIAGONALS) 1153 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1154 else if (op == MAT_COLUMN_ORIENTED) { 1155 a->roworiented = 0; 1156 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1157 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1158 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1159 a->donotstash = 1; 1160 } else if (op == MAT_NO_NEW_DIAGONALS){ 1161 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1162 } else { 1163 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1164 } 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNC__ 1169 #define __FUNC__ "MatGetSize_MPIAIJ" 1170 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1171 { 1172 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1173 1174 PetscFunctionBegin; 1175 if (m) *m = mat->M; 1176 if (n) *n = mat->N; 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNC__ 1181 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1182 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1183 { 1184 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1185 1186 PetscFunctionBegin; 1187 if (m) *m = mat->m; 1188 if (n) *n = mat->n; 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNC__ 1193 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1194 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1195 { 1196 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1197 1198 PetscFunctionBegin; 1199 *m = mat->rstart; *n = mat->rend; 1200 PetscFunctionReturn(0); 1201 } 1202 1203 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1204 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1205 1206 #undef __FUNC__ 1207 #define __FUNC__ "MatGetRow_MPIAIJ" 1208 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1209 { 1210 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1211 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1212 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1213 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1214 int *cmap, *idx_p; 1215 1216 PetscFunctionBegin; 1217 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1218 mat->getrowactive = PETSC_TRUE; 1219 1220 if (!mat->rowvalues && (idx || v)) { 1221 /* 1222 allocate enough space to hold information from the longest row. 1223 */ 1224 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1225 int max = 1,m = mat->m,tmp; 1226 for ( i=0; i<m; i++ ) { 1227 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1228 if (max < tmp) { max = tmp; } 1229 } 1230 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1231 mat->rowindices = (int *) (mat->rowvalues + max); 1232 } 1233 1234 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1235 lrow = row - rstart; 1236 1237 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1238 if (!v) {pvA = 0; pvB = 0;} 1239 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1240 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1241 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1242 nztot = nzA + nzB; 1243 1244 cmap = mat->garray; 1245 if (v || idx) { 1246 if (nztot) { 1247 /* Sort by increasing column numbers, assuming A and B already sorted */ 1248 int imark = -1; 1249 if (v) { 1250 *v = v_p = mat->rowvalues; 1251 for ( i=0; i<nzB; i++ ) { 1252 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1253 else break; 1254 } 1255 imark = i; 1256 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1257 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1258 } 1259 if (idx) { 1260 *idx = idx_p = mat->rowindices; 1261 if (imark > -1) { 1262 for ( i=0; i<imark; i++ ) { 1263 idx_p[i] = cmap[cworkB[i]]; 1264 } 1265 } else { 1266 for ( i=0; i<nzB; i++ ) { 1267 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1268 else break; 1269 } 1270 imark = i; 1271 } 1272 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1273 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1274 } 1275 } else { 1276 if (idx) *idx = 0; 1277 if (v) *v = 0; 1278 } 1279 } 1280 *nz = nztot; 1281 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1282 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNC__ 1287 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1288 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1289 { 1290 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1291 1292 PetscFunctionBegin; 1293 if (aij->getrowactive == PETSC_FALSE) { 1294 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1295 } 1296 aij->getrowactive = PETSC_FALSE; 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNC__ 1301 #define __FUNC__ "MatNorm_MPIAIJ" 1302 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1303 { 1304 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1305 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1306 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1307 double sum = 0.0; 1308 Scalar *v; 1309 1310 PetscFunctionBegin; 1311 if (aij->size == 1) { 1312 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1313 } else { 1314 if (type == NORM_FROBENIUS) { 1315 v = amat->a; 1316 for (i=0; i<amat->nz; i++ ) { 1317 #if defined(USE_PETSC_COMPLEX) 1318 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1319 #else 1320 sum += (*v)*(*v); v++; 1321 #endif 1322 } 1323 v = bmat->a; 1324 for (i=0; i<bmat->nz; i++ ) { 1325 #if defined(USE_PETSC_COMPLEX) 1326 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1327 #else 1328 sum += (*v)*(*v); v++; 1329 #endif 1330 } 1331 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1332 *norm = sqrt(*norm); 1333 } else if (type == NORM_1) { /* max column norm */ 1334 double *tmp, *tmp2; 1335 int *jj, *garray = aij->garray; 1336 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1337 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1338 PetscMemzero(tmp,aij->N*sizeof(double)); 1339 *norm = 0.0; 1340 v = amat->a; jj = amat->j; 1341 for ( j=0; j<amat->nz; j++ ) { 1342 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1343 } 1344 v = bmat->a; jj = bmat->j; 1345 for ( j=0; j<bmat->nz; j++ ) { 1346 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1347 } 1348 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1349 for ( j=0; j<aij->N; j++ ) { 1350 if (tmp2[j] > *norm) *norm = tmp2[j]; 1351 } 1352 PetscFree(tmp); PetscFree(tmp2); 1353 } else if (type == NORM_INFINITY) { /* max row norm */ 1354 double ntemp = 0.0; 1355 for ( j=0; j<amat->m; j++ ) { 1356 v = amat->a + amat->i[j] + shift; 1357 sum = 0.0; 1358 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1359 sum += PetscAbsScalar(*v); v++; 1360 } 1361 v = bmat->a + bmat->i[j] + shift; 1362 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1363 sum += PetscAbsScalar(*v); v++; 1364 } 1365 if (sum > ntemp) ntemp = sum; 1366 } 1367 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1368 } else { 1369 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1370 } 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNC__ 1376 #define __FUNC__ "MatTranspose_MPIAIJ" 1377 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1378 { 1379 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1380 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1381 int ierr,shift = Aloc->indexshift; 1382 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1383 Mat B; 1384 Scalar *array; 1385 1386 PetscFunctionBegin; 1387 if (matout == PETSC_NULL && M != N) { 1388 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1389 } 1390 1391 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1392 1393 /* copy over the A part */ 1394 Aloc = (Mat_SeqAIJ*) a->A->data; 1395 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1396 row = a->rstart; 1397 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1398 for ( i=0; i<m; i++ ) { 1399 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1400 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1401 } 1402 aj = Aloc->j; 1403 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1404 1405 /* copy over the B part */ 1406 Aloc = (Mat_SeqAIJ*) a->B->data; 1407 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1408 row = a->rstart; 1409 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1410 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1411 for ( i=0; i<m; i++ ) { 1412 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1413 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1414 } 1415 PetscFree(ct); 1416 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1417 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1418 if (matout != PETSC_NULL) { 1419 *matout = B; 1420 } else { 1421 PetscOps *Abops; 1422 struct _MatOps *Aops; 1423 1424 /* This isn't really an in-place transpose .... but free data structures from a */ 1425 PetscFree(a->rowners); 1426 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1427 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1428 #if defined (USE_CTABLE) 1429 if (a->colmap) TableDelete(a->colmap); 1430 #else 1431 if (a->colmap) PetscFree(a->colmap); 1432 #endif 1433 if (a->garray) PetscFree(a->garray); 1434 if (a->lvec) VecDestroy(a->lvec); 1435 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1436 PetscFree(a); 1437 1438 /* 1439 This is horrible, horrible code. We need to keep the 1440 A pointers for the bops and ops but copy everything 1441 else from C. 1442 */ 1443 Abops = A->bops; 1444 Aops = A->ops; 1445 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1446 A->bops = Abops; 1447 A->ops = Aops; 1448 PetscHeaderDestroy(B); 1449 } 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNC__ 1454 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1455 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1456 { 1457 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1458 Mat a = aij->A, b = aij->B; 1459 int ierr,s1,s2,s3; 1460 1461 PetscFunctionBegin; 1462 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1463 if (rr) { 1464 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1465 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1466 /* Overlap communication with computation. */ 1467 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1468 } 1469 if (ll) { 1470 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1471 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1472 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1473 } 1474 /* scale the diagonal block */ 1475 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1476 1477 if (rr) { 1478 /* Do a scatter end and then right scale the off-diagonal block */ 1479 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1480 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1481 } 1482 1483 PetscFunctionReturn(0); 1484 } 1485 1486 1487 extern int MatPrintHelp_SeqAIJ(Mat); 1488 #undef __FUNC__ 1489 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1490 int MatPrintHelp_MPIAIJ(Mat A) 1491 { 1492 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1493 int ierr; 1494 1495 PetscFunctionBegin; 1496 if (!a->rank) { 1497 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNC__ 1503 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1504 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1505 { 1506 PetscFunctionBegin; 1507 *bs = 1; 1508 PetscFunctionReturn(0); 1509 } 1510 #undef __FUNC__ 1511 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1512 int MatSetUnfactored_MPIAIJ(Mat A) 1513 { 1514 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1515 int ierr; 1516 1517 PetscFunctionBegin; 1518 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNC__ 1523 #define __FUNC__ "MatEqual_MPIAIJ" 1524 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1525 { 1526 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1527 Mat a, b, c, d; 1528 PetscTruth flg; 1529 int ierr; 1530 1531 PetscFunctionBegin; 1532 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1533 a = matA->A; b = matA->B; 1534 c = matB->A; d = matB->B; 1535 1536 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1537 if (flg == PETSC_TRUE) { 1538 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1539 } 1540 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNC__ 1545 #define __FUNC__ "MatCopy_MPIAIJ" 1546 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1547 { 1548 int ierr; 1549 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1550 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1551 1552 PetscFunctionBegin; 1553 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1554 /* because of the column compression in the off-processor part of the matrix a->B, 1555 the number of columns in a->B and b->B may be different, hence we cannot call 1556 the MatCopy() directly on the two parts. If need be, we can provide a more 1557 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1558 then copying the submatrices */ 1559 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1560 } else { 1561 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1562 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1563 } 1564 PetscFunctionReturn(0); 1565 } 1566 1567 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1568 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1569 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1570 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1571 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1572 1573 /* -------------------------------------------------------------------*/ 1574 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1575 MatGetRow_MPIAIJ, 1576 MatRestoreRow_MPIAIJ, 1577 MatMult_MPIAIJ, 1578 MatMultAdd_MPIAIJ, 1579 MatMultTrans_MPIAIJ, 1580 MatMultTransAdd_MPIAIJ, 1581 0, 1582 0, 1583 0, 1584 0, 1585 0, 1586 0, 1587 MatRelax_MPIAIJ, 1588 MatTranspose_MPIAIJ, 1589 MatGetInfo_MPIAIJ, 1590 MatEqual_MPIAIJ, 1591 MatGetDiagonal_MPIAIJ, 1592 MatDiagonalScale_MPIAIJ, 1593 MatNorm_MPIAIJ, 1594 MatAssemblyBegin_MPIAIJ, 1595 MatAssemblyEnd_MPIAIJ, 1596 0, 1597 MatSetOption_MPIAIJ, 1598 MatZeroEntries_MPIAIJ, 1599 MatZeroRows_MPIAIJ, 1600 0, 1601 0, 1602 0, 1603 0, 1604 MatGetSize_MPIAIJ, 1605 MatGetLocalSize_MPIAIJ, 1606 MatGetOwnershipRange_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 0, 1611 MatDuplicate_MPIAIJ, 1612 0, 1613 0, 1614 0, 1615 0, 1616 0, 1617 MatGetSubMatrices_MPIAIJ, 1618 MatIncreaseOverlap_MPIAIJ, 1619 MatGetValues_MPIAIJ, 1620 MatCopy_MPIAIJ, 1621 MatPrintHelp_MPIAIJ, 1622 MatScale_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 MatGetBlockSize_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 MatFDColoringCreate_MPIAIJ, 1632 0, 1633 MatSetUnfactored_MPIAIJ, 1634 0, 1635 0, 1636 MatGetSubMatrix_MPIAIJ, 1637 0, 1638 0, 1639 MatGetMaps_Petsc}; 1640 1641 /* ----------------------------------------------------------------------------------------*/ 1642 1643 EXTERN_C_BEGIN 1644 #undef __FUNC__ 1645 #define __FUNC__ "MatStoreValues_MPIAIJ" 1646 int MatStoreValues_MPIAIJ(Mat mat) 1647 { 1648 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1649 int ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1653 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1654 PetscFunctionReturn(0); 1655 } 1656 EXTERN_C_END 1657 1658 EXTERN_C_BEGIN 1659 #undef __FUNC__ 1660 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1661 int MatRetrieveValues_MPIAIJ(Mat mat) 1662 { 1663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1664 int ierr; 1665 1666 PetscFunctionBegin; 1667 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1668 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 EXTERN_C_END 1672 1673 #include "pc.h" 1674 EXTERN_C_BEGIN 1675 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1676 EXTERN_C_END 1677 1678 #undef __FUNC__ 1679 #define __FUNC__ "MatCreateMPIAIJ" 1680 /*@C 1681 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1682 (the default parallel PETSc format). For good matrix assembly performance 1683 the user should preallocate the matrix storage by setting the parameters 1684 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1685 performance can be increased by more than a factor of 50. 1686 1687 Collective on MPI_Comm 1688 1689 Input Parameters: 1690 + comm - MPI communicator 1691 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1692 This value should be the same as the local size used in creating the 1693 y vector for the matrix-vector product y = Ax. 1694 . n - This value should be the same as the local size used in creating the 1695 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1696 calculated if N is given) For square matrices n is almost always m. 1697 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1698 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1699 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1700 (same value is used for all local rows) 1701 . d_nnz - array containing the number of nonzeros in the various rows of the 1702 DIAGONAL portion of the local submatrix (possibly different for each row) 1703 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1704 The size of this array is equal to the number of local rows, i.e 'm'. 1705 You must leave room for the diagonal entry even if it is zero. 1706 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1707 submatrix (same value is used for all local rows). 1708 - o_nnz - array containing the number of nonzeros in the various rows of the 1709 OFF-DIAGONAL portion of the local submatrix (possibly different for 1710 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1711 structure. The size of this array is equal to the number 1712 of local rows, i.e 'm'. 1713 1714 Output Parameter: 1715 . A - the matrix 1716 1717 Notes: 1718 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1719 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1720 storage requirements for this matrix. 1721 1722 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1723 processor than it must be used on all processors that share the object for 1724 that argument. 1725 1726 The AIJ format (also called the Yale sparse matrix format or 1727 compressed row storage), is fully compatible with standard Fortran 77 1728 storage. That is, the stored row and column indices can begin at 1729 either one (as in Fortran) or zero. See the users manual for details. 1730 1731 The user MUST specify either the local or global matrix dimensions 1732 (possibly both). 1733 1734 The parallel matrix is partitioned such that the first m0 rows belong to 1735 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1736 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1737 1738 The DIAGONAL portion of the local submatrix of a processor can be defined 1739 as the submatrix which is obtained by extraction the part corresponding 1740 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1741 first row that belongs to the processor, and r2 is the last row belonging 1742 to the this processor. This is a square mxm matrix. The remaining portion 1743 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1744 1745 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1746 1747 By default, this format uses inodes (identical nodes) when possible. 1748 We search for consecutive rows with the same nonzero structure, thereby 1749 reusing matrix information to achieve increased efficiency. 1750 1751 Options Database Keys: 1752 + -mat_aij_no_inode - Do not use inodes 1753 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1754 - -mat_aij_oneindex - Internally use indexing starting at 1 1755 rather than 0. Note that when calling MatSetValues(), 1756 the user still MUST index entries starting at 0! 1757 . -mat_mpi - use the parallel matrix data structures even on one processor 1758 (defaults to using SeqBAIJ format on one processor) 1759 . -mat_mpi - use the parallel matrix data structures even on one processor 1760 (defaults to using SeqAIJ format on one processor) 1761 1762 1763 Example usage: 1764 1765 Consider the following 8x8 matrix with 34 non-zero values, that is 1766 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1767 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1768 as follows: 1769 1770 .vb 1771 1 2 0 | 0 3 0 | 0 4 1772 Proc0 0 5 6 | 7 0 0 | 8 0 1773 9 0 10 | 11 0 0 | 12 0 1774 ------------------------------------- 1775 13 0 14 | 15 16 17 | 0 0 1776 Proc1 0 18 0 | 19 20 21 | 0 0 1777 0 0 0 | 22 23 0 | 24 0 1778 ------------------------------------- 1779 Proc2 25 26 27 | 0 0 28 | 29 0 1780 30 0 0 | 31 32 33 | 0 34 1781 .ve 1782 1783 This can be represented as a collection of submatrices as: 1784 1785 .vb 1786 A B C 1787 D E F 1788 G H I 1789 .ve 1790 1791 Where the submatrices A,B,C are owned by proc0, D,E,F are 1792 owned by proc1, G,H,I are owned by proc2. 1793 1794 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1795 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1796 The 'M','N' parameters are 8,8, and have the same values on all procs. 1797 1798 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1799 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1800 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1801 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1802 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1803 matrix, ans [DF] as another SeqAIJ matrix. 1804 1805 When d_nz, o_nz parameters are specified, d_nz storage elements are 1806 allocated for every row of the local diagonal submatrix, and o_nz 1807 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1808 One way to choose d_nz and o_nz is to use the max nonzerors per local 1809 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1810 In this case, the values of d_nz,o_nz are: 1811 .vb 1812 proc0 : dnz = 2, o_nz = 2 1813 proc1 : dnz = 3, o_nz = 2 1814 proc2 : dnz = 1, o_nz = 4 1815 .ve 1816 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1817 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1818 for proc3. i.e we are using 12+15+10=37 storage locations to store 1819 34 values. 1820 1821 When d_nnz, o_nnz parameters are specified, the storage is specified 1822 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1823 In the above case the values for d_nnz,o_nnz are: 1824 .vb 1825 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1826 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1827 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1828 .ve 1829 Here the space allocated is sum of all the above values i.e 34, and 1830 hence pre-allocation is perfect. 1831 1832 Level: intermediate 1833 1834 .keywords: matrix, aij, compressed row, sparse, parallel 1835 1836 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1837 @*/ 1838 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1839 { 1840 Mat B; 1841 Mat_MPIAIJ *b; 1842 int ierr, i,size,flag1 = 0, flag2 = 0; 1843 1844 PetscFunctionBegin; 1845 MPI_Comm_size(comm,&size); 1846 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1847 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1848 if (!flag1 && !flag2 && size == 1) { 1849 if (M == PETSC_DECIDE) M = m; 1850 if (N == PETSC_DECIDE) N = n; 1851 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1852 PetscFunctionReturn(0); 1853 } 1854 1855 *A = 0; 1856 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1857 PLogObjectCreate(B); 1858 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1859 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1860 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1861 B->ops->destroy = MatDestroy_MPIAIJ; 1862 B->ops->view = MatView_MPIAIJ; 1863 B->factor = 0; 1864 B->assembled = PETSC_FALSE; 1865 B->mapping = 0; 1866 1867 B->insertmode = NOT_SET_VALUES; 1868 b->size = size; 1869 MPI_Comm_rank(comm,&b->rank); 1870 1871 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1872 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1873 } 1874 1875 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1876 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1877 b->m = m; B->m = m; 1878 b->n = n; B->n = n; 1879 b->N = N; B->N = N; 1880 b->M = M; B->M = M; 1881 1882 /* the information in the maps duplicates the information computed below, eventually 1883 we should remove the duplicate information that is not contained in the maps */ 1884 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1885 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1886 1887 /* build local table of row and column ownerships */ 1888 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1889 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1890 b->cowners = b->rowners + b->size + 2; 1891 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1892 b->rowners[0] = 0; 1893 for ( i=2; i<=b->size; i++ ) { 1894 b->rowners[i] += b->rowners[i-1]; 1895 } 1896 b->rstart = b->rowners[b->rank]; 1897 b->rend = b->rowners[b->rank+1]; 1898 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1899 b->cowners[0] = 0; 1900 for ( i=2; i<=b->size; i++ ) { 1901 b->cowners[i] += b->cowners[i-1]; 1902 } 1903 b->cstart = b->cowners[b->rank]; 1904 b->cend = b->cowners[b->rank+1]; 1905 1906 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1907 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1908 PLogObjectParent(B,b->A); 1909 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1910 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1911 PLogObjectParent(B,b->B); 1912 1913 /* build cache for off array entries formed */ 1914 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1915 b->donotstash = 0; 1916 b->colmap = 0; 1917 b->garray = 0; 1918 b->roworiented = 1; 1919 1920 /* stuff used for matrix vector multiply */ 1921 b->lvec = 0; 1922 b->Mvctx = 0; 1923 1924 /* stuff for MatGetRow() */ 1925 b->rowindices = 0; 1926 b->rowvalues = 0; 1927 b->getrowactive = PETSC_FALSE; 1928 1929 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1930 "MatStoreValues_MPIAIJ", 1931 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1932 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1933 "MatRetrieveValues_MPIAIJ", 1934 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1935 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1936 "MatGetDiagonalBlock_MPIAIJ", 1937 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1938 *A = B; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNC__ 1943 #define __FUNC__ "MatDuplicate_MPIAIJ" 1944 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1945 { 1946 Mat mat; 1947 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1948 int ierr, len=0, flg; 1949 1950 PetscFunctionBegin; 1951 *newmat = 0; 1952 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1953 PLogObjectCreate(mat); 1954 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1955 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1956 mat->ops->destroy = MatDestroy_MPIAIJ; 1957 mat->ops->view = MatView_MPIAIJ; 1958 mat->factor = matin->factor; 1959 mat->assembled = PETSC_TRUE; 1960 mat->insertmode = NOT_SET_VALUES; 1961 1962 a->m = mat->m = oldmat->m; 1963 a->n = mat->n = oldmat->n; 1964 a->M = mat->M = oldmat->M; 1965 a->N = mat->N = oldmat->N; 1966 1967 a->rstart = oldmat->rstart; 1968 a->rend = oldmat->rend; 1969 a->cstart = oldmat->cstart; 1970 a->cend = oldmat->cend; 1971 a->size = oldmat->size; 1972 a->rank = oldmat->rank; 1973 a->donotstash = oldmat->donotstash; 1974 a->roworiented = oldmat->roworiented; 1975 a->rowindices = 0; 1976 a->rowvalues = 0; 1977 a->getrowactive = PETSC_FALSE; 1978 1979 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1980 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1981 a->cowners = a->rowners + a->size + 2; 1982 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1983 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1984 if (oldmat->colmap) { 1985 #if defined (USE_CTABLE) 1986 ierr = TableCreateCopy( &a->colmap, oldmat->colmap ); CHKERRQ(ierr); 1987 #else 1988 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1989 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1990 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1991 #endif 1992 } else a->colmap = 0; 1993 if (oldmat->garray) { 1994 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1995 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1996 PLogObjectMemory(mat,len*sizeof(int)); 1997 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1998 } else a->garray = 0; 1999 2000 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2001 PLogObjectParent(mat,a->lvec); 2002 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2003 PLogObjectParent(mat,a->Mvctx); 2004 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2005 PLogObjectParent(mat,a->A); 2006 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2007 PLogObjectParent(mat,a->B); 2008 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2009 if (flg) { 2010 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2011 } 2012 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2013 *newmat = mat; 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #include "sys.h" 2018 2019 #undef __FUNC__ 2020 #define __FUNC__ "MatLoad_MPIAIJ" 2021 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2022 { 2023 Mat A; 2024 Scalar *vals,*svals; 2025 MPI_Comm comm = ((PetscObject)viewer)->comm; 2026 MPI_Status status; 2027 int i, nz, ierr, j,rstart, rend, fd; 2028 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2029 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2030 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2031 2032 PetscFunctionBegin; 2033 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2034 if (!rank) { 2035 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2036 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2037 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2038 if (header[3] < 0) { 2039 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2040 } 2041 } 2042 2043 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2044 M = header[1]; N = header[2]; 2045 /* determine ownership of all rows */ 2046 m = M/size + ((M % size) > rank); 2047 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2048 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2049 rowners[0] = 0; 2050 for ( i=2; i<=size; i++ ) { 2051 rowners[i] += rowners[i-1]; 2052 } 2053 rstart = rowners[rank]; 2054 rend = rowners[rank+1]; 2055 2056 /* distribute row lengths to all processors */ 2057 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2058 offlens = ourlens + (rend-rstart); 2059 if (!rank) { 2060 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2061 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2062 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2063 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2064 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2065 PetscFree(sndcounts); 2066 } else { 2067 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2068 } 2069 2070 if (!rank) { 2071 /* calculate the number of nonzeros on each processor */ 2072 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2073 PetscMemzero(procsnz,size*sizeof(int)); 2074 for ( i=0; i<size; i++ ) { 2075 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2076 procsnz[i] += rowlengths[j]; 2077 } 2078 } 2079 PetscFree(rowlengths); 2080 2081 /* determine max buffer needed and allocate it */ 2082 maxnz = 0; 2083 for ( i=0; i<size; i++ ) { 2084 maxnz = PetscMax(maxnz,procsnz[i]); 2085 } 2086 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2087 2088 /* read in my part of the matrix column indices */ 2089 nz = procsnz[0]; 2090 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2091 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2092 2093 /* read in every one elses and ship off */ 2094 for ( i=1; i<size; i++ ) { 2095 nz = procsnz[i]; 2096 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2097 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2098 } 2099 PetscFree(cols); 2100 } else { 2101 /* determine buffer space needed for message */ 2102 nz = 0; 2103 for ( i=0; i<m; i++ ) { 2104 nz += ourlens[i]; 2105 } 2106 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2107 2108 /* receive message of column indices*/ 2109 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2110 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2111 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2112 } 2113 2114 /* determine column ownership if matrix is not square */ 2115 if (N != M) { 2116 n = N/size + ((N % size) > rank); 2117 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2118 cstart = cend - n; 2119 } else { 2120 cstart = rstart; 2121 cend = rend; 2122 n = cend - cstart; 2123 } 2124 2125 /* loop over local rows, determining number of off diagonal entries */ 2126 PetscMemzero(offlens,m*sizeof(int)); 2127 jj = 0; 2128 for ( i=0; i<m; i++ ) { 2129 for ( j=0; j<ourlens[i]; j++ ) { 2130 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2131 jj++; 2132 } 2133 } 2134 2135 /* create our matrix */ 2136 for ( i=0; i<m; i++ ) { 2137 ourlens[i] -= offlens[i]; 2138 } 2139 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2140 A = *newmat; 2141 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2142 for ( i=0; i<m; i++ ) { 2143 ourlens[i] += offlens[i]; 2144 } 2145 2146 if (!rank) { 2147 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2148 2149 /* read in my part of the matrix numerical values */ 2150 nz = procsnz[0]; 2151 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2152 2153 /* insert into matrix */ 2154 jj = rstart; 2155 smycols = mycols; 2156 svals = vals; 2157 for ( i=0; i<m; i++ ) { 2158 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2159 smycols += ourlens[i]; 2160 svals += ourlens[i]; 2161 jj++; 2162 } 2163 2164 /* read in other processors and ship out */ 2165 for ( i=1; i<size; i++ ) { 2166 nz = procsnz[i]; 2167 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2168 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2169 } 2170 PetscFree(procsnz); 2171 } else { 2172 /* receive numeric values */ 2173 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2174 2175 /* receive message of values*/ 2176 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2177 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2178 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2179 2180 /* insert into matrix */ 2181 jj = rstart; 2182 smycols = mycols; 2183 svals = vals; 2184 for ( i=0; i<m; i++ ) { 2185 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2186 smycols += ourlens[i]; 2187 svals += ourlens[i]; 2188 jj++; 2189 } 2190 } 2191 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2192 2193 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2194 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2195 PetscFunctionReturn(0); 2196 } 2197 2198 #undef __FUNC__ 2199 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2200 /* 2201 Not great since it makes two copies of the submatrix, first an SeqAIJ 2202 in local and then by concatenating the local matrices the end result. 2203 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2204 */ 2205 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2206 { 2207 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2208 Mat *local,M, Mreuse; 2209 Scalar *vwork,*aa; 2210 MPI_Comm comm = mat->comm; 2211 Mat_SeqAIJ *aij; 2212 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2213 2214 PetscFunctionBegin; 2215 MPI_Comm_rank(comm,&rank); 2216 MPI_Comm_size(comm,&size); 2217 2218 if (call == MAT_REUSE_MATRIX) { 2219 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2220 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2221 local = &Mreuse; 2222 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2223 } else { 2224 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2225 Mreuse = *local; 2226 PetscFree(local); 2227 } 2228 2229 /* 2230 m - number of local rows 2231 n - number of columns (same on all processors) 2232 rstart - first row in new global matrix generated 2233 */ 2234 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2235 if (call == MAT_INITIAL_MATRIX) { 2236 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2237 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2238 ii = aij->i; 2239 jj = aij->j; 2240 2241 /* 2242 Determine the number of non-zeros in the diagonal and off-diagonal 2243 portions of the matrix in order to do correct preallocation 2244 */ 2245 2246 /* first get start and end of "diagonal" columns */ 2247 if (csize == PETSC_DECIDE) { 2248 nlocal = n/size + ((n % size) > rank); 2249 } else { 2250 nlocal = csize; 2251 } 2252 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2253 rstart = rend - nlocal; 2254 if (rank == size - 1 && rend != n) { 2255 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2256 } 2257 2258 /* next, compute all the lengths */ 2259 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2260 olens = dlens + m; 2261 for ( i=0; i<m; i++ ) { 2262 jend = ii[i+1] - ii[i]; 2263 olen = 0; 2264 dlen = 0; 2265 for ( j=0; j<jend; j++ ) { 2266 if ( *jj < rstart || *jj >= rend) olen++; 2267 else dlen++; 2268 jj++; 2269 } 2270 olens[i] = olen; 2271 dlens[i] = dlen; 2272 } 2273 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2274 PetscFree(dlens); 2275 } else { 2276 int ml,nl; 2277 2278 M = *newmat; 2279 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2280 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2281 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2282 /* 2283 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2284 rather than the slower MatSetValues(). 2285 */ 2286 M->was_assembled = PETSC_TRUE; 2287 M->assembled = PETSC_FALSE; 2288 } 2289 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2290 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2291 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2292 ii = aij->i; 2293 jj = aij->j; 2294 aa = aij->a; 2295 for (i=0; i<m; i++) { 2296 row = rstart + i; 2297 nz = ii[i+1] - ii[i]; 2298 cwork = jj; jj += nz; 2299 vwork = aa; aa += nz; 2300 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2301 } 2302 2303 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2304 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2305 *newmat = M; 2306 2307 /* save submatrix used in processor for next request */ 2308 if (call == MAT_INITIAL_MATRIX) { 2309 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2310 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2311 } 2312 2313 PetscFunctionReturn(0); 2314 } 2315