1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.285 1999/02/26 17:08:13 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i; 28 #if defined (USE_CTABLE) 29 int ierr; 30 #endif 31 32 PetscFunctionBegin; 33 #if defined (USE_CTABLE) 34 ierr = TableCreate(aij->n/5,&aij->colmap); CHKERRQ(ierr); 35 for ( i=0; i<n; i++ ){ 36 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 37 } 38 #else 39 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 40 PLogObjectMemory(mat,aij->N*sizeof(int)); 41 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 42 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 43 #endif 44 PetscFunctionReturn(0); 45 } 46 47 #define CHUNKSIZE 15 48 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 49 { \ 50 \ 51 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 52 rmax = aimax[row]; nrow = ailen[row]; \ 53 col1 = col - shift; \ 54 \ 55 low = 0; high = nrow; \ 56 while (high-low > 5) { \ 57 t = (low+high)/2; \ 58 if (rp[t] > col) high = t; \ 59 else low = t; \ 60 } \ 61 for ( _i=0; _i<nrow; _i++ ) { \ 62 if (rp[_i] > col1) break; \ 63 if (rp[_i] == col1) { \ 64 if (addv == ADD_VALUES) ap[_i] += value; \ 65 else ap[_i] = value; \ 66 goto a_noinsert; \ 67 } \ 68 } \ 69 if (nonew == 1) goto a_noinsert; \ 70 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 71 if (nrow >= rmax) { \ 72 /* there is no extra room in row, therefore enlarge */ \ 73 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 74 Scalar *new_a; \ 75 \ 76 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 77 \ 78 /* malloc new storage space */ \ 79 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 80 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 81 new_j = (int *) (new_a + new_nz); \ 82 new_i = new_j + new_nz; \ 83 \ 84 /* copy over old data into new slots */ \ 85 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 86 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 87 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 88 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 89 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 90 len*sizeof(int)); \ 91 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 92 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 93 len*sizeof(Scalar)); \ 94 /* free up old matrix storage */ \ 95 \ 96 PetscFree(a->a); \ 97 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int)); \ 162 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 163 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar)); \ 165 /* free up old matrix storage */ \ 166 \ 167 PetscFree(b->a); \ 168 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 169 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 170 b->singlemalloc = 1; \ 171 \ 172 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 173 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 174 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 175 b->maxnz += CHUNKSIZE; \ 176 b->reallocs++; \ 177 } \ 178 N = nrow++ - 1; b->nz++; \ 179 /* shift up all the later entries in this row */ \ 180 for ( ii=N; ii>=_i; ii-- ) { \ 181 rp[ii+1] = rp[ii]; \ 182 ap[ii+1] = ap[ii]; \ 183 } \ 184 rp[_i] = col1; \ 185 ap[_i] = value; \ 186 b_noinsert: ; \ 187 bilen[row] = nrow; \ 188 } 189 190 #undef __FUNC__ 191 #define __FUNC__ "MatSetValues_MPIAIJ" 192 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 193 { 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 195 Scalar value; 196 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 197 int cstart = aij->cstart, cend = aij->cend,row,col; 198 int roworiented = aij->roworiented; 199 200 /* Some Variables required in the macro */ 201 Mat A = aij->A; 202 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 203 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 204 Scalar *aa = a->a; 205 206 Mat B = aij->B; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 208 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 209 Scalar *ba = b->a; 210 211 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 212 int nonew = a->nonew,shift = a->indexshift; 213 Scalar *ap; 214 215 PetscFunctionBegin; 216 for ( i=0; i<m; i++ ) { 217 if (im[i] < 0) continue; 218 #if defined(USE_PETSC_BOPT_g) 219 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 220 #endif 221 if (im[i] >= rstart && im[i] < rend) { 222 row = im[i] - rstart; 223 for ( j=0; j<n; j++ ) { 224 if (in[j] >= cstart && in[j] < cend){ 225 col = in[j] - cstart; 226 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 227 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 228 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 229 } 230 else if (in[j] < 0) continue; 231 #if defined(USE_PETSC_BOPT_g) 232 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!aij->colmap) { 237 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined (USE_CTABLE) 240 ierr = TableFind(aij->colmap,in[j]+1,&col); CHKERRQ(ierr); 241 col--; 242 #else 243 col = aij->colmap[in[j]] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 246 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 249 B = aij->B; 250 b = (Mat_SeqAIJ *) B->data; 251 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 252 ba = b->a; 253 } 254 } else col = in[j]; 255 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 256 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 257 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 258 } 259 } 260 } else { 261 if (roworiented && !aij->donotstash) { 262 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 263 } else { 264 if (!aij->donotstash) { 265 row = im[i]; 266 for ( j=0; j<n; j++ ) { 267 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 268 } 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatGetValues_MPIAIJ" 278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282 int cstart = aij->cstart, cend = aij->cend,row,col; 283 284 PetscFunctionBegin; 285 for ( i=0; i<m; i++ ) { 286 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288 if (idxm[i] >= rstart && idxm[i] < rend) { 289 row = idxm[i] - rstart; 290 for ( j=0; j<n; j++ ) { 291 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293 if (idxn[j] >= cstart && idxn[j] < cend){ 294 col = idxn[j] - cstart; 295 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 296 } else { 297 if (!aij->colmap) { 298 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299 } 300 #if defined (USE_CTABLE) 301 ierr = TableFind(aij->colmap,idxn[j]+1,&col); CHKERRQ(ierr); 302 col --; 303 #else 304 col = aij->colmap[idxn[j]] - 1; 305 #endif 306 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307 else { 308 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 309 } 310 } 311 } 312 } else { 313 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 #if defined (__JUNK__) 319 320 #undef __FUNC__ 321 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 322 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 323 { 324 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 325 MPI_Comm comm = mat->comm; 326 int size = aij->size, *owners = aij->rowners; 327 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 328 MPI_Request *send_waits,*recv_waits; 329 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 330 InsertMode addv; 331 Scalar *rvalues,*svalues; 332 333 PetscFunctionBegin; 334 if (aij->donotstash) { 335 aij->svalues = 0; aij->rvalues = 0; 336 aij->nsends = 0; aij->nrecvs = 0; 337 aij->send_waits = 0; aij->recv_waits = 0; 338 aij->rmax = 0; 339 PetscFunctionReturn(0); 340 } 341 342 /* make sure all processors are either in INSERTMODE or ADDMODE */ 343 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 344 if (addv == (ADD_VALUES|INSERT_VALUES)) { 345 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 346 } 347 mat->insertmode = addv; /* in case this processor had no cache */ 348 349 /* first count number of contributors to each processor */ 350 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 351 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 352 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 353 for ( i=0; i<aij->stash.n; i++ ) { 354 idx = aij->stash.idx[i]; 355 for ( j=0; j<size; j++ ) { 356 if (idx >= owners[j] && idx < owners[j+1]) { 357 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 358 } 359 } 360 } 361 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 362 363 /* inform other processors of number of messages and max length*/ 364 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 365 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 366 nreceives = work[rank]; 367 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 368 nmax = work[rank]; 369 PetscFree(work); 370 371 /* post receives: 372 1) each message will consist of ordered pairs 373 (global index,value) we store the global index as a double 374 to simplify the message passing. 375 2) since we don't know how long each individual message is we 376 allocate the largest needed buffer for each receive. Potentially 377 this is a lot of wasted space. 378 379 380 This could be done better. 381 */ 382 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 383 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 384 for ( i=0; i<nreceives; i++ ) { 385 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 386 comm,recv_waits+i);CHKERRQ(ierr); 387 } 388 389 /* do sends: 390 1) starts[i] gives the starting index in svalues for stuff going to 391 the ith processor 392 */ 393 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 394 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 395 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 396 starts[0] = 0; 397 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 398 for ( i=0; i<aij->stash.n; i++ ) { 399 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 400 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 401 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 402 } 403 PetscFree(owner); 404 starts[0] = 0; 405 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 406 count = 0; 407 for ( i=0; i<size; i++ ) { 408 if (procs[i]) { 409 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 410 } 411 } 412 PetscFree(starts); 413 PetscFree(nprocs); 414 415 /* Free cache space */ 416 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 417 ierr = StashReset_Private(&aij->stash); CHKERRQ(ierr); 418 419 aij->svalues = svalues; aij->rvalues = rvalues; 420 aij->nsends = nsends; aij->nrecvs = nreceives; 421 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 422 aij->rmax = nmax; 423 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNC__ 428 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 429 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 430 { 431 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 432 MPI_Status *send_status,recv_status; 433 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 434 int row,col,other_disassembled; 435 Scalar *values,val; 436 InsertMode addv = mat->insertmode; 437 438 PetscFunctionBegin; 439 440 /* wait on receives */ 441 while (count) { 442 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 443 /* unpack receives into our local space */ 444 values = aij->rvalues + 3*imdex*aij->rmax; 445 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 446 n = n/3; 447 for ( i=0; i<n; i++ ) { 448 row = (int) PetscReal(values[3*i]) - aij->rstart; 449 col = (int) PetscReal(values[3*i+1]); 450 val = values[3*i+2]; 451 if (col >= aij->cstart && col < aij->cend) { 452 col -= aij->cstart; 453 ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 454 } else { 455 if (mat->was_assembled || mat->assembled) { 456 if (!aij->colmap) { 457 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 458 } 459 #if defined (USE_CTABLE) 460 ierr = TableFind(aij->colmap,col+1,&col); CHKERRQ(ierr); 461 col --; 462 #else 463 col = aij->colmap[col] - 1; 464 #endif 465 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 466 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 467 col = (int) PetscReal(values[3*i+1]); 468 } 469 } 470 ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 471 } 472 } 473 count--; 474 } 475 if (aij->recv_waits) PetscFree(aij->recv_waits); 476 if (aij->rvalues) PetscFree(aij->rvalues); 477 478 /* wait on sends */ 479 if (aij->nsends) { 480 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 481 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 482 PetscFree(send_status); 483 } 484 if (aij->send_waits) PetscFree(aij->send_waits); 485 if (aij->svalues) PetscFree(aij->svalues); 486 487 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 488 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 489 490 /* determine if any processor has disassembled, if so we must 491 also disassemble ourselfs, in order that we may reassemble. */ 492 /* 493 if nonzero structure of submatrix B cannot change then we know that 494 no processor disassembled thus we can skip this stuff 495 */ 496 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 497 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 498 if (mat->was_assembled && !other_disassembled) { 499 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 500 } 501 } 502 503 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 504 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 505 } 506 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 507 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 508 509 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 510 PetscFunctionReturn(0); 511 } 512 513 #else 514 515 #undef __FUNC__ 516 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 517 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 518 { 519 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 520 int ierr; 521 InsertMode addv; 522 523 PetscFunctionBegin; 524 if (aij->donotstash) { 525 PetscFunctionReturn(0); 526 } 527 528 /* make sure all processors are either in INSERTMODE or ADDMODE */ 529 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 530 if (addv == (ADD_VALUES|INSERT_VALUES)) { 531 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 532 } 533 mat->insertmode = addv; /* in case this processor had no cache */ 534 535 ierr = StashScatterBegin_Private(&aij->stash,aij->rowners); CHKERRQ(ierr); 536 /* Free cache space */ 537 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 538 PetscFunctionReturn(0); 539 } 540 541 542 #undef __FUNC__ 543 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 544 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 545 { 546 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 547 int imdex,i,j,rstart,ncols,n,ierr; 548 int *row,*col,other_disassembled; 549 Scalar *val; 550 InsertMode addv = mat->insertmode; 551 552 PetscFunctionBegin; 553 if (!aij->donotstash) { 554 ierr = StashScatterEnd_Private(&aij->stash); CHKERRQ(ierr); 555 for (imdex=0; imdex<aij->stash.nrecvs; imdex++ ) { 556 n = aij->stash.rdata[imdex].n; 557 row = aij->stash.rdata[imdex].i; 558 col = aij->stash.rdata[imdex].j; 559 val = aij->stash.rdata[imdex].a; 560 for ( i=0; i<n; ) { 561 /* Now identify the consecutive vals belonging to the same row */ 562 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 563 if (j < n) ncols = j-i; 564 else ncols = n-i; 565 /* Now assemble all these values with a single function call */ 566 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr); 567 i = j; 568 } 569 } 570 ierr = StashReset_Private(&aij->stash); CHKERRQ(ierr); 571 } 572 573 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 591 } 592 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 594 595 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 596 PetscFunctionReturn(0); 597 } 598 599 #endif 600 601 #undef __FUNC__ 602 #define __FUNC__ "MatZeroEntries_MPIAIJ" 603 int MatZeroEntries_MPIAIJ(Mat A) 604 { 605 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 606 int ierr; 607 608 PetscFunctionBegin; 609 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 610 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 611 PetscFunctionReturn(0); 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatZeroRows_MPIAIJ" 616 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 617 { 618 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 619 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 620 int *procs,*nprocs,j,found,idx,nsends,*work,row; 621 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 622 int *rvalues,tag = A->tag,count,base,slen,n,*source; 623 int *lens,imdex,*lrows,*values,rstart=l->rstart; 624 MPI_Comm comm = A->comm; 625 MPI_Request *send_waits,*recv_waits; 626 MPI_Status recv_status,*send_status; 627 IS istmp; 628 629 PetscFunctionBegin; 630 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 631 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 632 633 /* first count number of contributors to each processor */ 634 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 635 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 636 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 637 for ( i=0; i<N; i++ ) { 638 idx = rows[i]; 639 found = 0; 640 for ( j=0; j<size; j++ ) { 641 if (idx >= owners[j] && idx < owners[j+1]) { 642 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 643 } 644 } 645 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 646 } 647 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 648 649 /* inform other processors of number of messages and max length*/ 650 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 651 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 652 nrecvs = work[rank]; 653 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 654 nmax = work[rank]; 655 PetscFree(work); 656 657 /* post receives: */ 658 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 659 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 660 for ( i=0; i<nrecvs; i++ ) { 661 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 662 } 663 664 /* do sends: 665 1) starts[i] gives the starting index in svalues for stuff going to 666 the ith processor 667 */ 668 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 669 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 670 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 671 starts[0] = 0; 672 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 673 for ( i=0; i<N; i++ ) { 674 svalues[starts[owner[i]]++] = rows[i]; 675 } 676 ISRestoreIndices(is,&rows); 677 678 starts[0] = 0; 679 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 680 count = 0; 681 for ( i=0; i<size; i++ ) { 682 if (procs[i]) { 683 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 684 } 685 } 686 PetscFree(starts); 687 688 base = owners[rank]; 689 690 /* wait on receives */ 691 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 692 source = lens + nrecvs; 693 count = nrecvs; slen = 0; 694 while (count) { 695 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 696 /* unpack receives into our local space */ 697 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 698 source[imdex] = recv_status.MPI_SOURCE; 699 lens[imdex] = n; 700 slen += n; 701 count--; 702 } 703 PetscFree(recv_waits); 704 705 /* move the data into the send scatter */ 706 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 707 count = 0; 708 for ( i=0; i<nrecvs; i++ ) { 709 values = rvalues + i*nmax; 710 for ( j=0; j<lens[i]; j++ ) { 711 lrows[count++] = values[j] - base; 712 } 713 } 714 PetscFree(rvalues); PetscFree(lens); 715 PetscFree(owner); PetscFree(nprocs); 716 717 /* actually zap the local rows */ 718 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 719 PLogObjectParent(A,istmp); 720 721 /* 722 Zero the required rows. If the "diagonal block" of the matrix 723 is square and the user wishes to set the diagonal we use seperate 724 code so that MatSetValues() is not called for each diagonal allocating 725 new memory, thus calling lots of mallocs and slowing things down. 726 727 Contributed by: Mathew Knepley 728 */ 729 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 730 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 731 if (diag && (l->A->M == l->A->N)) { 732 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 733 } else if (diag) { 734 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 735 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 736 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 737 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 738 } 739 for ( i = 0; i < slen; i++ ) { 740 row = lrows[i] + rstart; 741 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 742 } 743 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 744 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 } else { 746 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 747 } 748 ierr = ISDestroy(istmp); CHKERRQ(ierr); 749 PetscFree(lrows); 750 751 /* wait on sends */ 752 if (nsends) { 753 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 754 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 755 PetscFree(send_status); 756 } 757 PetscFree(send_waits); PetscFree(svalues); 758 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNC__ 763 #define __FUNC__ "MatMult_MPIAIJ" 764 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 765 { 766 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 767 int ierr,nt; 768 769 PetscFunctionBegin; 770 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 771 if (nt != a->n) { 772 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 773 } 774 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 775 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 776 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 777 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNC__ 782 #define __FUNC__ "MatMultAdd_MPIAIJ" 783 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 784 { 785 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 786 int ierr; 787 788 PetscFunctionBegin; 789 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 790 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 791 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 792 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNC__ 797 #define __FUNC__ "MatMultTrans_MPIAIJ" 798 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 799 { 800 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 801 int ierr; 802 803 PetscFunctionBegin; 804 /* do nondiagonal part */ 805 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 806 /* send it on its way */ 807 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 808 /* do local part */ 809 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 810 /* receive remote parts: note this assumes the values are not actually */ 811 /* inserted in yy until the next line, which is true for my implementation*/ 812 /* but is not perhaps always true. */ 813 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNC__ 818 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 819 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 820 { 821 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 822 int ierr; 823 824 PetscFunctionBegin; 825 /* do nondiagonal part */ 826 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 827 /* send it on its way */ 828 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 829 /* do local part */ 830 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 831 /* receive remote parts: note this assumes the values are not actually */ 832 /* inserted in yy until the next line, which is true for my implementation*/ 833 /* but is not perhaps always true. */ 834 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 /* 839 This only works correctly for square matrices where the subblock A->A is the 840 diagonal block 841 */ 842 #undef __FUNC__ 843 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 844 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 845 { 846 int ierr; 847 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 848 849 PetscFunctionBegin; 850 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 851 if (a->rstart != a->cstart || a->rend != a->cend) { 852 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 853 } 854 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 858 #undef __FUNC__ 859 #define __FUNC__ "MatScale_MPIAIJ" 860 int MatScale_MPIAIJ(Scalar *aa,Mat A) 861 { 862 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 863 int ierr; 864 865 PetscFunctionBegin; 866 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 867 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 868 PetscFunctionReturn(0); 869 } 870 871 #undef __FUNC__ 872 #define __FUNC__ "MatDestroy_MPIAIJ" 873 int MatDestroy_MPIAIJ(Mat mat) 874 { 875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 876 int ierr; 877 878 PetscFunctionBegin; 879 if (--mat->refct > 0) PetscFunctionReturn(0); 880 881 if (mat->mapping) { 882 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 883 } 884 if (mat->bmapping) { 885 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 886 } 887 if (mat->rmap) { 888 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 889 } 890 if (mat->cmap) { 891 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 892 } 893 #if defined(USE_PETSC_LOG) 894 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 895 #endif 896 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 897 PetscFree(aij->rowners); 898 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 899 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 900 #if defined (USE_CTABLE) 901 if (aij->colmap) TableDelete(aij->colmap); 902 #else 903 if (aij->colmap) PetscFree(aij->colmap); 904 #endif 905 if (aij->garray) PetscFree(aij->garray); 906 if (aij->lvec) VecDestroy(aij->lvec); 907 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 908 if (aij->rowvalues) PetscFree(aij->rowvalues); 909 PetscFree(aij); 910 PLogObjectDestroy(mat); 911 PetscHeaderDestroy(mat); 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNC__ 916 #define __FUNC__ "MatView_MPIAIJ_Binary" 917 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 918 { 919 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 920 int ierr; 921 922 PetscFunctionBegin; 923 if (aij->size == 1) { 924 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 925 } 926 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNC__ 931 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 932 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 933 { 934 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 935 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 936 int ierr, format,shift = C->indexshift,rank = aij->rank ; 937 int size = aij->size; 938 FILE *fd; 939 ViewerType vtype; 940 941 PetscFunctionBegin; 942 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 943 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 944 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 945 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 946 MatInfo info; 947 int flg; 948 MPI_Comm_rank(mat->comm,&rank); 949 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 950 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 951 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 952 PetscSequentialPhaseBegin(mat->comm,1); 953 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 954 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 955 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 956 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 957 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 958 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 959 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 960 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 961 fflush(fd); 962 PetscSequentialPhaseEnd(mat->comm,1); 963 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 966 PetscFunctionReturn(0); 967 } 968 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 969 Draw draw; 970 PetscTruth isnull; 971 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 972 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 973 } 974 975 if (size == 1) { 976 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 977 } else { 978 /* assemble the entire matrix onto first processor. */ 979 Mat A; 980 Mat_SeqAIJ *Aloc; 981 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 982 Scalar *a; 983 984 if (!rank) { 985 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 986 } else { 987 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 988 } 989 PLogObjectParent(mat,A); 990 991 /* copy over the A part */ 992 Aloc = (Mat_SeqAIJ*) aij->A->data; 993 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 994 row = aij->rstart; 995 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 996 for ( i=0; i<m; i++ ) { 997 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 998 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 999 } 1000 aj = Aloc->j; 1001 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 1002 1003 /* copy over the B part */ 1004 Aloc = (Mat_SeqAIJ*) aij->B->data; 1005 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1006 row = aij->rstart; 1007 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 1008 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 1009 for ( i=0; i<m; i++ ) { 1010 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1011 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1012 } 1013 PetscFree(ct); 1014 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1015 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1016 /* 1017 Everyone has to call to draw the matrix since the graphics waits are 1018 synchronized across all processors that share the Draw object 1019 */ 1020 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 1021 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 1022 } 1023 ierr = MatDestroy(A); CHKERRQ(ierr); 1024 } 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNC__ 1029 #define __FUNC__ "MatView_MPIAIJ" 1030 int MatView_MPIAIJ(Mat mat,Viewer viewer) 1031 { 1032 int ierr; 1033 ViewerType vtype; 1034 1035 PetscFunctionBegin; 1036 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 1037 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 1038 PetscTypeCompare(vtype,SOCKET_VIEWER)) { 1039 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr); 1040 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 1041 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1042 } else { 1043 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 1044 } 1045 PetscFunctionReturn(0); 1046 } 1047 1048 /* 1049 This has to provide several versions. 1050 1051 2) a) use only local smoothing updating outer values only once. 1052 b) local smoothing updating outer values each inner iteration 1053 3) color updating out values betwen colors. 1054 */ 1055 #undef __FUNC__ 1056 #define __FUNC__ "MatRelax_MPIAIJ" 1057 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 1058 double fshift,int its,Vec xx) 1059 { 1060 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1061 Mat AA = mat->A, BB = mat->B; 1062 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 1063 Scalar *b,*x,*xs,*ls,d,*v,sum; 1064 int ierr,*idx, *diag; 1065 int n = mat->n, m = mat->m, i,shift = A->indexshift; 1066 1067 PetscFunctionBegin; 1068 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 1069 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 1070 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 1071 xs = x + shift; /* shift by one for index start of 1 */ 1072 ls = ls + shift; 1073 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 1074 diag = A->diag; 1075 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1076 if (flag & SOR_ZERO_INITIAL_GUESS) { 1077 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1078 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1079 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1080 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1084 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085 while (its--) { 1086 /* go down through the rows */ 1087 for ( i=0; i<m; i++ ) { 1088 n = A->i[i+1] - A->i[i]; 1089 PLogFlops(4*n+3); 1090 idx = A->j + A->i[i] + shift; 1091 v = A->a + A->i[i] + shift; 1092 sum = b[i]; 1093 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1094 d = fshift + A->a[diag[i]+shift]; 1095 n = B->i[i+1] - B->i[i]; 1096 idx = B->j + B->i[i] + shift; 1097 v = B->a + B->i[i] + shift; 1098 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1099 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1100 } 1101 /* come up through the rows */ 1102 for ( i=m-1; i>-1; i-- ) { 1103 n = A->i[i+1] - A->i[i]; 1104 PLogFlops(4*n+3) 1105 idx = A->j + A->i[i] + shift; 1106 v = A->a + A->i[i] + shift; 1107 sum = b[i]; 1108 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1109 d = fshift + A->a[diag[i]+shift]; 1110 n = B->i[i+1] - B->i[i]; 1111 idx = B->j + B->i[i] + shift; 1112 v = B->a + B->i[i] + shift; 1113 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1114 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1115 } 1116 } 1117 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1118 if (flag & SOR_ZERO_INITIAL_GUESS) { 1119 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1120 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1121 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1122 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1126 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1127 while (its--) { 1128 for ( i=0; i<m; i++ ) { 1129 n = A->i[i+1] - A->i[i]; 1130 PLogFlops(4*n+3); 1131 idx = A->j + A->i[i] + shift; 1132 v = A->a + A->i[i] + shift; 1133 sum = b[i]; 1134 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1135 d = fshift + A->a[diag[i]+shift]; 1136 n = B->i[i+1] - B->i[i]; 1137 idx = B->j + B->i[i] + shift; 1138 v = B->a + B->i[i] + shift; 1139 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1140 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1141 } 1142 } 1143 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1144 if (flag & SOR_ZERO_INITIAL_GUESS) { 1145 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1146 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1147 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1148 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1152 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1153 while (its--) { 1154 for ( i=m-1; i>-1; i-- ) { 1155 n = A->i[i+1] - A->i[i]; 1156 PLogFlops(4*n+3); 1157 idx = A->j + A->i[i] + shift; 1158 v = A->a + A->i[i] + shift; 1159 sum = b[i]; 1160 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1161 d = fshift + A->a[diag[i]+shift]; 1162 n = B->i[i+1] - B->i[i]; 1163 idx = B->j + B->i[i] + shift; 1164 v = B->a + B->i[i] + shift; 1165 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1166 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1167 } 1168 } 1169 } else { 1170 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1171 } 1172 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1173 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1174 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNC__ 1179 #define __FUNC__ "MatGetInfo_MPIAIJ" 1180 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1181 { 1182 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1183 Mat A = mat->A, B = mat->B; 1184 int ierr; 1185 double isend[5], irecv[5]; 1186 1187 PetscFunctionBegin; 1188 info->block_size = 1.0; 1189 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1190 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1191 isend[3] = info->memory; isend[4] = info->mallocs; 1192 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1193 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1194 isend[3] += info->memory; isend[4] += info->mallocs; 1195 if (flag == MAT_LOCAL) { 1196 info->nz_used = isend[0]; 1197 info->nz_allocated = isend[1]; 1198 info->nz_unneeded = isend[2]; 1199 info->memory = isend[3]; 1200 info->mallocs = isend[4]; 1201 } else if (flag == MAT_GLOBAL_MAX) { 1202 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1203 info->nz_used = irecv[0]; 1204 info->nz_allocated = irecv[1]; 1205 info->nz_unneeded = irecv[2]; 1206 info->memory = irecv[3]; 1207 info->mallocs = irecv[4]; 1208 } else if (flag == MAT_GLOBAL_SUM) { 1209 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1210 info->nz_used = irecv[0]; 1211 info->nz_allocated = irecv[1]; 1212 info->nz_unneeded = irecv[2]; 1213 info->memory = irecv[3]; 1214 info->mallocs = irecv[4]; 1215 } 1216 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1217 info->fill_ratio_needed = 0; 1218 info->factor_mallocs = 0; 1219 info->rows_global = (double)mat->M; 1220 info->columns_global = (double)mat->N; 1221 info->rows_local = (double)mat->m; 1222 info->columns_local = (double)mat->N; 1223 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNC__ 1228 #define __FUNC__ "MatSetOption_MPIAIJ" 1229 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1232 int ierr; 1233 1234 PetscFunctionBegin; 1235 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1236 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1237 op == MAT_COLUMNS_UNSORTED || 1238 op == MAT_COLUMNS_SORTED || 1239 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1240 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1241 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1242 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1243 } else if (op == MAT_ROW_ORIENTED) { 1244 a->roworiented = 1; 1245 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1246 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1247 } else if (op == MAT_ROWS_SORTED || 1248 op == MAT_ROWS_UNSORTED || 1249 op == MAT_SYMMETRIC || 1250 op == MAT_STRUCTURALLY_SYMMETRIC || 1251 op == MAT_YES_NEW_DIAGONALS) 1252 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1253 else if (op == MAT_COLUMN_ORIENTED) { 1254 a->roworiented = 0; 1255 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1256 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1257 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1258 a->donotstash = 1; 1259 } else if (op == MAT_NO_NEW_DIAGONALS){ 1260 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1261 } else { 1262 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1263 } 1264 PetscFunctionReturn(0); 1265 } 1266 1267 #undef __FUNC__ 1268 #define __FUNC__ "MatGetSize_MPIAIJ" 1269 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1270 { 1271 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1272 1273 PetscFunctionBegin; 1274 if (m) *m = mat->M; 1275 if (n) *n = mat->N; 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNC__ 1280 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1281 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1282 { 1283 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1284 1285 PetscFunctionBegin; 1286 if (m) *m = mat->m; 1287 if (n) *n = mat->n; 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNC__ 1292 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1293 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1294 { 1295 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1296 1297 PetscFunctionBegin; 1298 *m = mat->rstart; *n = mat->rend; 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNC__ 1303 #define __FUNC__ "MatGetRow_MPIAIJ" 1304 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1305 { 1306 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1307 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1308 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1309 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1310 int *cmap, *idx_p; 1311 1312 PetscFunctionBegin; 1313 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1314 mat->getrowactive = PETSC_TRUE; 1315 1316 if (!mat->rowvalues && (idx || v)) { 1317 /* 1318 allocate enough space to hold information from the longest row. 1319 */ 1320 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1321 int max = 1,m = mat->m,tmp; 1322 for ( i=0; i<m; i++ ) { 1323 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1324 if (max < tmp) { max = tmp; } 1325 } 1326 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1327 mat->rowindices = (int *) (mat->rowvalues + max); 1328 } 1329 1330 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1331 lrow = row - rstart; 1332 1333 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1334 if (!v) {pvA = 0; pvB = 0;} 1335 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1336 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1337 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1338 nztot = nzA + nzB; 1339 1340 cmap = mat->garray; 1341 if (v || idx) { 1342 if (nztot) { 1343 /* Sort by increasing column numbers, assuming A and B already sorted */ 1344 int imark = -1; 1345 if (v) { 1346 *v = v_p = mat->rowvalues; 1347 for ( i=0; i<nzB; i++ ) { 1348 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1349 else break; 1350 } 1351 imark = i; 1352 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1353 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1354 } 1355 if (idx) { 1356 *idx = idx_p = mat->rowindices; 1357 if (imark > -1) { 1358 for ( i=0; i<imark; i++ ) { 1359 idx_p[i] = cmap[cworkB[i]]; 1360 } 1361 } else { 1362 for ( i=0; i<nzB; i++ ) { 1363 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1364 else break; 1365 } 1366 imark = i; 1367 } 1368 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1369 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1370 } 1371 } else { 1372 if (idx) *idx = 0; 1373 if (v) *v = 0; 1374 } 1375 } 1376 *nz = nztot; 1377 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1378 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNC__ 1383 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1384 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1385 { 1386 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1387 1388 PetscFunctionBegin; 1389 if (aij->getrowactive == PETSC_FALSE) { 1390 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1391 } 1392 aij->getrowactive = PETSC_FALSE; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #undef __FUNC__ 1397 #define __FUNC__ "MatNorm_MPIAIJ" 1398 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1399 { 1400 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1401 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1402 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1403 double sum = 0.0; 1404 Scalar *v; 1405 1406 PetscFunctionBegin; 1407 if (aij->size == 1) { 1408 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1409 } else { 1410 if (type == NORM_FROBENIUS) { 1411 v = amat->a; 1412 for (i=0; i<amat->nz; i++ ) { 1413 #if defined(USE_PETSC_COMPLEX) 1414 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1415 #else 1416 sum += (*v)*(*v); v++; 1417 #endif 1418 } 1419 v = bmat->a; 1420 for (i=0; i<bmat->nz; i++ ) { 1421 #if defined(USE_PETSC_COMPLEX) 1422 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1423 #else 1424 sum += (*v)*(*v); v++; 1425 #endif 1426 } 1427 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1428 *norm = sqrt(*norm); 1429 } else if (type == NORM_1) { /* max column norm */ 1430 double *tmp, *tmp2; 1431 int *jj, *garray = aij->garray; 1432 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1433 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1434 PetscMemzero(tmp,aij->N*sizeof(double)); 1435 *norm = 0.0; 1436 v = amat->a; jj = amat->j; 1437 for ( j=0; j<amat->nz; j++ ) { 1438 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1439 } 1440 v = bmat->a; jj = bmat->j; 1441 for ( j=0; j<bmat->nz; j++ ) { 1442 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1443 } 1444 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1445 for ( j=0; j<aij->N; j++ ) { 1446 if (tmp2[j] > *norm) *norm = tmp2[j]; 1447 } 1448 PetscFree(tmp); PetscFree(tmp2); 1449 } else if (type == NORM_INFINITY) { /* max row norm */ 1450 double ntemp = 0.0; 1451 for ( j=0; j<amat->m; j++ ) { 1452 v = amat->a + amat->i[j] + shift; 1453 sum = 0.0; 1454 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1455 sum += PetscAbsScalar(*v); v++; 1456 } 1457 v = bmat->a + bmat->i[j] + shift; 1458 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1459 sum += PetscAbsScalar(*v); v++; 1460 } 1461 if (sum > ntemp) ntemp = sum; 1462 } 1463 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1464 } else { 1465 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1466 } 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNC__ 1472 #define __FUNC__ "MatTranspose_MPIAIJ" 1473 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1474 { 1475 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1476 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1477 int ierr,shift = Aloc->indexshift; 1478 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1479 Mat B; 1480 Scalar *array; 1481 1482 PetscFunctionBegin; 1483 if (matout == PETSC_NULL && M != N) { 1484 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1485 } 1486 1487 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1488 1489 /* copy over the A part */ 1490 Aloc = (Mat_SeqAIJ*) a->A->data; 1491 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1492 row = a->rstart; 1493 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1494 for ( i=0; i<m; i++ ) { 1495 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1496 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1497 } 1498 aj = Aloc->j; 1499 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1500 1501 /* copy over the B part */ 1502 Aloc = (Mat_SeqAIJ*) a->B->data; 1503 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1504 row = a->rstart; 1505 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1506 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1507 for ( i=0; i<m; i++ ) { 1508 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1509 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1510 } 1511 PetscFree(ct); 1512 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1513 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1514 if (matout != PETSC_NULL) { 1515 *matout = B; 1516 } else { 1517 PetscOps *Abops; 1518 struct _MatOps *Aops; 1519 1520 /* This isn't really an in-place transpose .... but free data structures from a */ 1521 PetscFree(a->rowners); 1522 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1523 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1524 #if defined (USE_CTABLE) 1525 if (a->colmap) TableDelete(a->colmap); 1526 #else 1527 if (a->colmap) PetscFree(a->colmap); 1528 #endif 1529 if (a->garray) PetscFree(a->garray); 1530 if (a->lvec) VecDestroy(a->lvec); 1531 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1532 PetscFree(a); 1533 1534 /* 1535 This is horrible, horrible code. We need to keep the 1536 A pointers for the bops and ops but copy everything 1537 else from C. 1538 */ 1539 Abops = A->bops; 1540 Aops = A->ops; 1541 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1542 A->bops = Abops; 1543 A->ops = Aops; 1544 PetscHeaderDestroy(B); 1545 } 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNC__ 1550 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1551 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1552 { 1553 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1554 Mat a = aij->A, b = aij->B; 1555 int ierr,s1,s2,s3; 1556 1557 PetscFunctionBegin; 1558 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1559 if (rr) { 1560 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1561 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1562 /* Overlap communication with computation. */ 1563 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1564 } 1565 if (ll) { 1566 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1567 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1568 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1569 } 1570 /* scale the diagonal block */ 1571 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1572 1573 if (rr) { 1574 /* Do a scatter end and then right scale the off-diagonal block */ 1575 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1576 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1577 } 1578 1579 PetscFunctionReturn(0); 1580 } 1581 1582 1583 #undef __FUNC__ 1584 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1585 int MatPrintHelp_MPIAIJ(Mat A) 1586 { 1587 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1588 int ierr; 1589 1590 PetscFunctionBegin; 1591 if (!a->rank) { 1592 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1593 } 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #undef __FUNC__ 1598 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1599 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1600 { 1601 PetscFunctionBegin; 1602 *bs = 1; 1603 PetscFunctionReturn(0); 1604 } 1605 #undef __FUNC__ 1606 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1607 int MatSetUnfactored_MPIAIJ(Mat A) 1608 { 1609 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1610 int ierr; 1611 1612 PetscFunctionBegin; 1613 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNC__ 1618 #define __FUNC__ "MatEqual_MPIAIJ" 1619 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1620 { 1621 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1622 Mat a, b, c, d; 1623 PetscTruth flg; 1624 int ierr; 1625 1626 PetscFunctionBegin; 1627 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1628 a = matA->A; b = matA->B; 1629 c = matB->A; d = matB->B; 1630 1631 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1632 if (flg == PETSC_TRUE) { 1633 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1634 } 1635 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1636 PetscFunctionReturn(0); 1637 } 1638 1639 #undef __FUNC__ 1640 #define __FUNC__ "MatCopy_MPIAIJ" 1641 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1642 { 1643 int ierr; 1644 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1645 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1646 1647 PetscFunctionBegin; 1648 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1649 /* because of the column compression in the off-processor part of the matrix a->B, 1650 the number of columns in a->B and b->B may be different, hence we cannot call 1651 the MatCopy() directly on the two parts. If need be, we can provide a more 1652 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1653 then copying the submatrices */ 1654 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1655 } else { 1656 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1657 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1658 } 1659 PetscFunctionReturn(0); 1660 } 1661 1662 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1663 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1664 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1665 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1666 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1667 1668 /* -------------------------------------------------------------------*/ 1669 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1670 MatGetRow_MPIAIJ, 1671 MatRestoreRow_MPIAIJ, 1672 MatMult_MPIAIJ, 1673 MatMultAdd_MPIAIJ, 1674 MatMultTrans_MPIAIJ, 1675 MatMultTransAdd_MPIAIJ, 1676 0, 1677 0, 1678 0, 1679 0, 1680 0, 1681 0, 1682 MatRelax_MPIAIJ, 1683 MatTranspose_MPIAIJ, 1684 MatGetInfo_MPIAIJ, 1685 MatEqual_MPIAIJ, 1686 MatGetDiagonal_MPIAIJ, 1687 MatDiagonalScale_MPIAIJ, 1688 MatNorm_MPIAIJ, 1689 MatAssemblyBegin_MPIAIJ, 1690 MatAssemblyEnd_MPIAIJ, 1691 0, 1692 MatSetOption_MPIAIJ, 1693 MatZeroEntries_MPIAIJ, 1694 MatZeroRows_MPIAIJ, 1695 0, 1696 0, 1697 0, 1698 0, 1699 MatGetSize_MPIAIJ, 1700 MatGetLocalSize_MPIAIJ, 1701 MatGetOwnershipRange_MPIAIJ, 1702 0, 1703 0, 1704 0, 1705 0, 1706 MatDuplicate_MPIAIJ, 1707 0, 1708 0, 1709 0, 1710 0, 1711 0, 1712 MatGetSubMatrices_MPIAIJ, 1713 MatIncreaseOverlap_MPIAIJ, 1714 MatGetValues_MPIAIJ, 1715 MatCopy_MPIAIJ, 1716 MatPrintHelp_MPIAIJ, 1717 MatScale_MPIAIJ, 1718 0, 1719 0, 1720 0, 1721 MatGetBlockSize_MPIAIJ, 1722 0, 1723 0, 1724 0, 1725 0, 1726 MatFDColoringCreate_MPIAIJ, 1727 0, 1728 MatSetUnfactored_MPIAIJ, 1729 0, 1730 0, 1731 MatGetSubMatrix_MPIAIJ, 1732 0, 1733 0, 1734 MatGetMaps_Petsc}; 1735 1736 /* ----------------------------------------------------------------------------------------*/ 1737 1738 EXTERN_C_BEGIN 1739 #undef __FUNC__ 1740 #define __FUNC__ "MatStoreValues_MPIAIJ" 1741 int MatStoreValues_MPIAIJ(Mat mat) 1742 { 1743 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1744 int ierr; 1745 1746 PetscFunctionBegin; 1747 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1748 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1749 PetscFunctionReturn(0); 1750 } 1751 EXTERN_C_END 1752 1753 EXTERN_C_BEGIN 1754 #undef __FUNC__ 1755 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1756 int MatRetrieveValues_MPIAIJ(Mat mat) 1757 { 1758 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1759 int ierr; 1760 1761 PetscFunctionBegin; 1762 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1763 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1764 PetscFunctionReturn(0); 1765 } 1766 EXTERN_C_END 1767 1768 #include "pc.h" 1769 EXTERN_C_BEGIN 1770 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1771 EXTERN_C_END 1772 1773 #undef __FUNC__ 1774 #define __FUNC__ "MatCreateMPIAIJ" 1775 /*@C 1776 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1777 (the default parallel PETSc format). For good matrix assembly performance 1778 the user should preallocate the matrix storage by setting the parameters 1779 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1780 performance can be increased by more than a factor of 50. 1781 1782 Collective on MPI_Comm 1783 1784 Input Parameters: 1785 + comm - MPI communicator 1786 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1787 This value should be the same as the local size used in creating the 1788 y vector for the matrix-vector product y = Ax. 1789 . n - This value should be the same as the local size used in creating the 1790 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1791 calculated if N is given) For square matrices n is almost always m. 1792 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1793 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1794 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1795 (same value is used for all local rows) 1796 . d_nnz - array containing the number of nonzeros in the various rows of the 1797 DIAGONAL portion of the local submatrix (possibly different for each row) 1798 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1799 The size of this array is equal to the number of local rows, i.e 'm'. 1800 You must leave room for the diagonal entry even if it is zero. 1801 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1802 submatrix (same value is used for all local rows). 1803 - o_nnz - array containing the number of nonzeros in the various rows of the 1804 OFF-DIAGONAL portion of the local submatrix (possibly different for 1805 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1806 structure. The size of this array is equal to the number 1807 of local rows, i.e 'm'. 1808 1809 Output Parameter: 1810 . A - the matrix 1811 1812 Notes: 1813 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1814 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1815 storage requirements for this matrix. 1816 1817 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1818 processor than it must be used on all processors that share the object for 1819 that argument. 1820 1821 The AIJ format (also called the Yale sparse matrix format or 1822 compressed row storage), is fully compatible with standard Fortran 77 1823 storage. That is, the stored row and column indices can begin at 1824 either one (as in Fortran) or zero. See the users manual for details. 1825 1826 The user MUST specify either the local or global matrix dimensions 1827 (possibly both). 1828 1829 The parallel matrix is partitioned such that the first m0 rows belong to 1830 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1831 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1832 1833 The DIAGONAL portion of the local submatrix of a processor can be defined 1834 as the submatrix which is obtained by extraction the part corresponding 1835 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1836 first row that belongs to the processor, and r2 is the last row belonging 1837 to the this processor. This is a square mxm matrix. The remaining portion 1838 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1839 1840 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1841 1842 By default, this format uses inodes (identical nodes) when possible. 1843 We search for consecutive rows with the same nonzero structure, thereby 1844 reusing matrix information to achieve increased efficiency. 1845 1846 Options Database Keys: 1847 + -mat_aij_no_inode - Do not use inodes 1848 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1849 - -mat_aij_oneindex - Internally use indexing starting at 1 1850 rather than 0. Note that when calling MatSetValues(), 1851 the user still MUST index entries starting at 0! 1852 . -mat_mpi - use the parallel matrix data structures even on one processor 1853 (defaults to using SeqBAIJ format on one processor) 1854 . -mat_mpi - use the parallel matrix data structures even on one processor 1855 (defaults to using SeqAIJ format on one processor) 1856 1857 1858 Example usage: 1859 1860 Consider the following 8x8 matrix with 34 non-zero values, that is 1861 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1862 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1863 as follows: 1864 1865 .vb 1866 1 2 0 | 0 3 0 | 0 4 1867 Proc0 0 5 6 | 7 0 0 | 8 0 1868 9 0 10 | 11 0 0 | 12 0 1869 ------------------------------------- 1870 13 0 14 | 15 16 17 | 0 0 1871 Proc1 0 18 0 | 19 20 21 | 0 0 1872 0 0 0 | 22 23 0 | 24 0 1873 ------------------------------------- 1874 Proc2 25 26 27 | 0 0 28 | 29 0 1875 30 0 0 | 31 32 33 | 0 34 1876 .ve 1877 1878 This can be represented as a collection of submatrices as: 1879 1880 .vb 1881 A B C 1882 D E F 1883 G H I 1884 .ve 1885 1886 Where the submatrices A,B,C are owned by proc0, D,E,F are 1887 owned by proc1, G,H,I are owned by proc2. 1888 1889 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1890 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1891 The 'M','N' parameters are 8,8, and have the same values on all procs. 1892 1893 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1894 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1895 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1896 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1897 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1898 matrix, ans [DF] as another SeqAIJ matrix. 1899 1900 When d_nz, o_nz parameters are specified, d_nz storage elements are 1901 allocated for every row of the local diagonal submatrix, and o_nz 1902 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1903 One way to choose d_nz and o_nz is to use the max nonzerors per local 1904 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1905 In this case, the values of d_nz,o_nz are: 1906 .vb 1907 proc0 : dnz = 2, o_nz = 2 1908 proc1 : dnz = 3, o_nz = 2 1909 proc2 : dnz = 1, o_nz = 4 1910 .ve 1911 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1912 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1913 for proc3. i.e we are using 12+15+10=37 storage locations to store 1914 34 values. 1915 1916 When d_nnz, o_nnz parameters are specified, the storage is specified 1917 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1918 In the above case the values for d_nnz,o_nnz are: 1919 .vb 1920 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1921 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1922 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1923 .ve 1924 Here the space allocated is sum of all the above values i.e 34, and 1925 hence pre-allocation is perfect. 1926 1927 Level: intermediate 1928 1929 .keywords: matrix, aij, compressed row, sparse, parallel 1930 1931 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1932 @*/ 1933 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1934 { 1935 Mat B; 1936 Mat_MPIAIJ *b; 1937 int ierr, i,size,flag1 = 0, flag2 = 0; 1938 1939 PetscFunctionBegin; 1940 MPI_Comm_size(comm,&size); 1941 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1942 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1943 if (!flag1 && !flag2 && size == 1) { 1944 if (M == PETSC_DECIDE) M = m; 1945 if (N == PETSC_DECIDE) N = n; 1946 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1947 PetscFunctionReturn(0); 1948 } 1949 1950 *A = 0; 1951 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1952 PLogObjectCreate(B); 1953 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1954 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1955 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1956 B->ops->destroy = MatDestroy_MPIAIJ; 1957 B->ops->view = MatView_MPIAIJ; 1958 B->factor = 0; 1959 B->assembled = PETSC_FALSE; 1960 B->mapping = 0; 1961 1962 B->insertmode = NOT_SET_VALUES; 1963 b->size = size; 1964 MPI_Comm_rank(comm,&b->rank); 1965 1966 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1967 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1968 } 1969 1970 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1971 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1972 b->m = m; B->m = m; 1973 b->n = n; B->n = n; 1974 b->N = N; B->N = N; 1975 b->M = M; B->M = M; 1976 1977 /* the information in the maps duplicates the information computed below, eventually 1978 we should remove the duplicate information that is not contained in the maps */ 1979 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1980 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1981 1982 /* build local table of row and column ownerships */ 1983 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1984 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1985 b->cowners = b->rowners + b->size + 2; 1986 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1987 b->rowners[0] = 0; 1988 for ( i=2; i<=b->size; i++ ) { 1989 b->rowners[i] += b->rowners[i-1]; 1990 } 1991 b->rstart = b->rowners[b->rank]; 1992 b->rend = b->rowners[b->rank+1]; 1993 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1994 b->cowners[0] = 0; 1995 for ( i=2; i<=b->size; i++ ) { 1996 b->cowners[i] += b->cowners[i-1]; 1997 } 1998 b->cstart = b->cowners[b->rank]; 1999 b->cend = b->cowners[b->rank+1]; 2000 2001 if (d_nz == PETSC_DEFAULT) d_nz = 5; 2002 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 2003 PLogObjectParent(B,b->A); 2004 if (o_nz == PETSC_DEFAULT) o_nz = 0; 2005 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 2006 PLogObjectParent(B,b->B); 2007 2008 /* build cache for off array entries formed */ 2009 ierr = StashCreate_Private(comm,1,&b->stash); CHKERRQ(ierr); 2010 b->donotstash = 0; 2011 b->colmap = 0; 2012 b->garray = 0; 2013 b->roworiented = 1; 2014 2015 /* stuff used for matrix vector multiply */ 2016 b->lvec = 0; 2017 b->Mvctx = 0; 2018 2019 /* stuff for MatGetRow() */ 2020 b->rowindices = 0; 2021 b->rowvalues = 0; 2022 b->getrowactive = PETSC_FALSE; 2023 2024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 2025 "MatStoreValues_MPIAIJ", 2026 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 2027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 2028 "MatRetrieveValues_MPIAIJ", 2029 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 2030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 2031 "MatGetDiagonalBlock_MPIAIJ", 2032 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 2033 *A = B; 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNC__ 2038 #define __FUNC__ "MatDuplicate_MPIAIJ" 2039 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2040 { 2041 Mat mat; 2042 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 2043 int ierr, len=0, flg; 2044 2045 PetscFunctionBegin; 2046 *newmat = 0; 2047 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 2048 PLogObjectCreate(mat); 2049 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 2050 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 2051 mat->ops->destroy = MatDestroy_MPIAIJ; 2052 mat->ops->view = MatView_MPIAIJ; 2053 mat->factor = matin->factor; 2054 mat->assembled = PETSC_TRUE; 2055 mat->insertmode = NOT_SET_VALUES; 2056 2057 a->m = mat->m = oldmat->m; 2058 a->n = mat->n = oldmat->n; 2059 a->M = mat->M = oldmat->M; 2060 a->N = mat->N = oldmat->N; 2061 2062 a->rstart = oldmat->rstart; 2063 a->rend = oldmat->rend; 2064 a->cstart = oldmat->cstart; 2065 a->cend = oldmat->cend; 2066 a->size = oldmat->size; 2067 a->rank = oldmat->rank; 2068 a->donotstash = oldmat->donotstash; 2069 a->roworiented = oldmat->roworiented; 2070 a->rowindices = 0; 2071 a->rowvalues = 0; 2072 a->getrowactive = PETSC_FALSE; 2073 2074 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 2075 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 2076 a->cowners = a->rowners + a->size + 2; 2077 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 2078 ierr = StashCreate_Private(matin->comm,1,&a->stash); CHKERRQ(ierr); 2079 if (oldmat->colmap) { 2080 #if defined (USE_CTABLE) 2081 ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr); 2082 #else 2083 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 2084 PLogObjectMemory(mat,(a->N)*sizeof(int)); 2085 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 2086 #endif 2087 } else a->colmap = 0; 2088 if (oldmat->garray) { 2089 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 2090 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 2091 PLogObjectMemory(mat,len*sizeof(int)); 2092 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2093 } else a->garray = 0; 2094 2095 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2096 PLogObjectParent(mat,a->lvec); 2097 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2098 PLogObjectParent(mat,a->Mvctx); 2099 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2100 PLogObjectParent(mat,a->A); 2101 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2102 PLogObjectParent(mat,a->B); 2103 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2104 if (flg) { 2105 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2106 } 2107 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2108 *newmat = mat; 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #include "sys.h" 2113 2114 #undef __FUNC__ 2115 #define __FUNC__ "MatLoad_MPIAIJ" 2116 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2117 { 2118 Mat A; 2119 Scalar *vals,*svals; 2120 MPI_Comm comm = ((PetscObject)viewer)->comm; 2121 MPI_Status status; 2122 int i, nz, ierr, j,rstart, rend, fd; 2123 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2124 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2125 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2126 2127 PetscFunctionBegin; 2128 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2129 if (!rank) { 2130 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2131 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2132 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2133 if (header[3] < 0) { 2134 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2135 } 2136 } 2137 2138 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2139 M = header[1]; N = header[2]; 2140 /* determine ownership of all rows */ 2141 m = M/size + ((M % size) > rank); 2142 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2143 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2144 rowners[0] = 0; 2145 for ( i=2; i<=size; i++ ) { 2146 rowners[i] += rowners[i-1]; 2147 } 2148 rstart = rowners[rank]; 2149 rend = rowners[rank+1]; 2150 2151 /* distribute row lengths to all processors */ 2152 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2153 offlens = ourlens + (rend-rstart); 2154 if (!rank) { 2155 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2156 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2157 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2158 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2159 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2160 PetscFree(sndcounts); 2161 } else { 2162 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2163 } 2164 2165 if (!rank) { 2166 /* calculate the number of nonzeros on each processor */ 2167 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2168 PetscMemzero(procsnz,size*sizeof(int)); 2169 for ( i=0; i<size; i++ ) { 2170 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2171 procsnz[i] += rowlengths[j]; 2172 } 2173 } 2174 PetscFree(rowlengths); 2175 2176 /* determine max buffer needed and allocate it */ 2177 maxnz = 0; 2178 for ( i=0; i<size; i++ ) { 2179 maxnz = PetscMax(maxnz,procsnz[i]); 2180 } 2181 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2182 2183 /* read in my part of the matrix column indices */ 2184 nz = procsnz[0]; 2185 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2186 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2187 2188 /* read in every one elses and ship off */ 2189 for ( i=1; i<size; i++ ) { 2190 nz = procsnz[i]; 2191 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2192 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2193 } 2194 PetscFree(cols); 2195 } else { 2196 /* determine buffer space needed for message */ 2197 nz = 0; 2198 for ( i=0; i<m; i++ ) { 2199 nz += ourlens[i]; 2200 } 2201 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2202 2203 /* receive message of column indices*/ 2204 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2205 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2206 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2207 } 2208 2209 /* determine column ownership if matrix is not square */ 2210 if (N != M) { 2211 n = N/size + ((N % size) > rank); 2212 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2213 cstart = cend - n; 2214 } else { 2215 cstart = rstart; 2216 cend = rend; 2217 n = cend - cstart; 2218 } 2219 2220 /* loop over local rows, determining number of off diagonal entries */ 2221 PetscMemzero(offlens,m*sizeof(int)); 2222 jj = 0; 2223 for ( i=0; i<m; i++ ) { 2224 for ( j=0; j<ourlens[i]; j++ ) { 2225 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2226 jj++; 2227 } 2228 } 2229 2230 /* create our matrix */ 2231 for ( i=0; i<m; i++ ) { 2232 ourlens[i] -= offlens[i]; 2233 } 2234 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2235 A = *newmat; 2236 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2237 for ( i=0; i<m; i++ ) { 2238 ourlens[i] += offlens[i]; 2239 } 2240 2241 if (!rank) { 2242 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2243 2244 /* read in my part of the matrix numerical values */ 2245 nz = procsnz[0]; 2246 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2247 2248 /* insert into matrix */ 2249 jj = rstart; 2250 smycols = mycols; 2251 svals = vals; 2252 for ( i=0; i<m; i++ ) { 2253 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2254 smycols += ourlens[i]; 2255 svals += ourlens[i]; 2256 jj++; 2257 } 2258 2259 /* read in other processors and ship out */ 2260 for ( i=1; i<size; i++ ) { 2261 nz = procsnz[i]; 2262 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2263 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2264 } 2265 PetscFree(procsnz); 2266 } else { 2267 /* receive numeric values */ 2268 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2269 2270 /* receive message of values*/ 2271 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2272 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2273 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2274 2275 /* insert into matrix */ 2276 jj = rstart; 2277 smycols = mycols; 2278 svals = vals; 2279 for ( i=0; i<m; i++ ) { 2280 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2281 smycols += ourlens[i]; 2282 svals += ourlens[i]; 2283 jj++; 2284 } 2285 } 2286 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2287 2288 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2289 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNC__ 2294 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2295 /* 2296 Not great since it makes two copies of the submatrix, first an SeqAIJ 2297 in local and then by concatenating the local matrices the end result. 2298 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2299 */ 2300 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2301 { 2302 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2303 Mat *local,M, Mreuse; 2304 Scalar *vwork,*aa; 2305 MPI_Comm comm = mat->comm; 2306 Mat_SeqAIJ *aij; 2307 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2308 2309 PetscFunctionBegin; 2310 MPI_Comm_rank(comm,&rank); 2311 MPI_Comm_size(comm,&size); 2312 2313 if (call == MAT_REUSE_MATRIX) { 2314 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2315 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2316 local = &Mreuse; 2317 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2318 } else { 2319 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2320 Mreuse = *local; 2321 PetscFree(local); 2322 } 2323 2324 /* 2325 m - number of local rows 2326 n - number of columns (same on all processors) 2327 rstart - first row in new global matrix generated 2328 */ 2329 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2330 if (call == MAT_INITIAL_MATRIX) { 2331 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2332 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2333 ii = aij->i; 2334 jj = aij->j; 2335 2336 /* 2337 Determine the number of non-zeros in the diagonal and off-diagonal 2338 portions of the matrix in order to do correct preallocation 2339 */ 2340 2341 /* first get start and end of "diagonal" columns */ 2342 if (csize == PETSC_DECIDE) { 2343 nlocal = n/size + ((n % size) > rank); 2344 } else { 2345 nlocal = csize; 2346 } 2347 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2348 rstart = rend - nlocal; 2349 if (rank == size - 1 && rend != n) { 2350 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2351 } 2352 2353 /* next, compute all the lengths */ 2354 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2355 olens = dlens + m; 2356 for ( i=0; i<m; i++ ) { 2357 jend = ii[i+1] - ii[i]; 2358 olen = 0; 2359 dlen = 0; 2360 for ( j=0; j<jend; j++ ) { 2361 if ( *jj < rstart || *jj >= rend) olen++; 2362 else dlen++; 2363 jj++; 2364 } 2365 olens[i] = olen; 2366 dlens[i] = dlen; 2367 } 2368 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2369 PetscFree(dlens); 2370 } else { 2371 int ml,nl; 2372 2373 M = *newmat; 2374 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2375 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2376 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2377 /* 2378 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2379 rather than the slower MatSetValues(). 2380 */ 2381 M->was_assembled = PETSC_TRUE; 2382 M->assembled = PETSC_FALSE; 2383 } 2384 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2385 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2386 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2387 ii = aij->i; 2388 jj = aij->j; 2389 aa = aij->a; 2390 for (i=0; i<m; i++) { 2391 row = rstart + i; 2392 nz = ii[i+1] - ii[i]; 2393 cwork = jj; jj += nz; 2394 vwork = aa; aa += nz; 2395 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2396 } 2397 2398 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2399 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2400 *newmat = M; 2401 2402 /* save submatrix used in processor for next request */ 2403 if (call == MAT_INITIAL_MATRIX) { 2404 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2405 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2406 } 2407 2408 PetscFunctionReturn(0); 2409 } 2410