1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.264 1998/10/19 22:17:56 bsmith Exp balay $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } else { 282 if (!aij->colmap) { 283 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 284 } 285 col = aij->colmap[idxn[j]] - 1; 286 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 287 else { 288 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 289 } 290 } 291 } 292 } else { 293 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 294 } 295 } 296 PetscFunctionReturn(0); 297 } 298 299 #undef __FUNC__ 300 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 301 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 304 MPI_Comm comm = mat->comm; 305 int size = aij->size, *owners = aij->rowners; 306 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 307 MPI_Request *send_waits,*recv_waits; 308 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 309 InsertMode addv; 310 Scalar *rvalues,*svalues; 311 312 PetscFunctionBegin; 313 if (aij->donotstash) { 314 aij->svalues = 0; aij->rvalues = 0; 315 aij->nsends = 0; aij->nrecvs = 0; 316 aij->send_waits = 0; aij->recv_waits = 0; 317 aij->rmax = 0; 318 PetscFunctionReturn(0); 319 } 320 321 /* make sure all processors are either in INSERTMODE or ADDMODE */ 322 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 323 if (addv == (ADD_VALUES|INSERT_VALUES)) { 324 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 325 } 326 mat->insertmode = addv; /* in case this processor had no cache */ 327 328 /* first count number of contributors to each processor */ 329 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 330 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 331 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 332 for ( i=0; i<aij->stash.n; i++ ) { 333 idx = aij->stash.idx[i]; 334 for ( j=0; j<size; j++ ) { 335 if (idx >= owners[j] && idx < owners[j+1]) { 336 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 337 } 338 } 339 } 340 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 341 342 /* inform other processors of number of messages and max length*/ 343 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 344 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 345 nreceives = work[rank]; 346 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 347 nmax = work[rank]; 348 PetscFree(work); 349 350 /* post receives: 351 1) each message will consist of ordered pairs 352 (global index,value) we store the global index as a double 353 to simplify the message passing. 354 2) since we don't know how long each individual message is we 355 allocate the largest needed buffer for each receive. Potentially 356 this is a lot of wasted space. 357 358 359 This could be done better. 360 */ 361 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 362 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 363 for ( i=0; i<nreceives; i++ ) { 364 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 365 comm,recv_waits+i);CHKERRQ(ierr); 366 } 367 368 /* do sends: 369 1) starts[i] gives the starting index in svalues for stuff going to 370 the ith processor 371 */ 372 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 373 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 374 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 375 starts[0] = 0; 376 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 377 for ( i=0; i<aij->stash.n; i++ ) { 378 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 379 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 380 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 381 } 382 PetscFree(owner); 383 starts[0] = 0; 384 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 385 count = 0; 386 for ( i=0; i<size; i++ ) { 387 if (procs[i]) { 388 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 389 } 390 } 391 PetscFree(starts); 392 PetscFree(nprocs); 393 394 /* Free cache space */ 395 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 396 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 397 398 aij->svalues = svalues; aij->rvalues = rvalues; 399 aij->nsends = nsends; aij->nrecvs = nreceives; 400 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 401 aij->rmax = nmax; 402 403 PetscFunctionReturn(0); 404 } 405 extern int MatSetUpMultiply_MPIAIJ(Mat); 406 407 #undef __FUNC__ 408 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 409 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 412 MPI_Status *send_status,recv_status; 413 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 414 int row,col,other_disassembled; 415 Scalar *values,val; 416 InsertMode addv = mat->insertmode; 417 418 PetscFunctionBegin; 419 420 /* wait on receives */ 421 while (count) { 422 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 423 /* unpack receives into our local space */ 424 values = aij->rvalues + 3*imdex*aij->rmax; 425 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 426 n = n/3; 427 for ( i=0; i<n; i++ ) { 428 row = (int) PetscReal(values[3*i]) - aij->rstart; 429 col = (int) PetscReal(values[3*i+1]); 430 val = values[3*i+2]; 431 if (col >= aij->cstart && col < aij->cend) { 432 col -= aij->cstart; 433 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 434 } else { 435 if (mat->was_assembled || mat->assembled) { 436 if (!aij->colmap) { 437 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 438 } 439 col = aij->colmap[col] - 1; 440 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 441 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 442 col = (int) PetscReal(values[3*i+1]); 443 } 444 } 445 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 446 } 447 } 448 count--; 449 } 450 if (aij->recv_waits) PetscFree(aij->recv_waits); 451 if (aij->rvalues) PetscFree(aij->rvalues); 452 453 /* wait on sends */ 454 if (aij->nsends) { 455 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 456 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 457 PetscFree(send_status); 458 } 459 if (aij->send_waits) PetscFree(aij->send_waits); 460 if (aij->svalues) PetscFree(aij->svalues); 461 462 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 463 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 464 465 /* determine if any processor has disassembled, if so we must 466 also disassemble ourselfs, in order that we may reassemble. */ 467 /* 468 if nonzero structure of submatrix B cannot change then we know that 469 no processor disassembled thus we can skip this stuff 470 */ 471 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 472 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 473 if (mat->was_assembled && !other_disassembled) { 474 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 475 } 476 } 477 478 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 479 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 480 } 481 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 482 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 483 484 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNC__ 489 #define __FUNC__ "MatZeroEntries_MPIAIJ" 490 int MatZeroEntries_MPIAIJ(Mat A) 491 { 492 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 493 int ierr; 494 495 PetscFunctionBegin; 496 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 497 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNC__ 502 #define __FUNC__ "MatZeroRows_MPIAIJ" 503 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 504 { 505 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 506 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 507 int *procs,*nprocs,j,found,idx,nsends,*work,row; 508 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 509 int *rvalues,tag = A->tag,count,base,slen,n,*source; 510 int *lens,imdex,*lrows,*values,rstart=l->rstart; 511 MPI_Comm comm = A->comm; 512 MPI_Request *send_waits,*recv_waits; 513 MPI_Status recv_status,*send_status; 514 IS istmp; 515 PetscTruth localdiag; 516 517 PetscFunctionBegin; 518 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 519 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 520 521 /* first count number of contributors to each processor */ 522 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 523 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 524 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 525 for ( i=0; i<N; i++ ) { 526 idx = rows[i]; 527 found = 0; 528 for ( j=0; j<size; j++ ) { 529 if (idx >= owners[j] && idx < owners[j+1]) { 530 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 531 } 532 } 533 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 534 } 535 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 536 537 /* inform other processors of number of messages and max length*/ 538 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 539 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 540 nrecvs = work[rank]; 541 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 542 nmax = work[rank]; 543 PetscFree(work); 544 545 /* post receives: */ 546 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 547 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 548 for ( i=0; i<nrecvs; i++ ) { 549 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 550 } 551 552 /* do sends: 553 1) starts[i] gives the starting index in svalues for stuff going to 554 the ith processor 555 */ 556 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 557 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 558 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 559 starts[0] = 0; 560 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 561 for ( i=0; i<N; i++ ) { 562 svalues[starts[owner[i]]++] = rows[i]; 563 } 564 ISRestoreIndices(is,&rows); 565 566 starts[0] = 0; 567 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 568 count = 0; 569 for ( i=0; i<size; i++ ) { 570 if (procs[i]) { 571 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 572 } 573 } 574 PetscFree(starts); 575 576 base = owners[rank]; 577 578 /* wait on receives */ 579 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 580 source = lens + nrecvs; 581 count = nrecvs; slen = 0; 582 while (count) { 583 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 584 /* unpack receives into our local space */ 585 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 586 source[imdex] = recv_status.MPI_SOURCE; 587 lens[imdex] = n; 588 slen += n; 589 count--; 590 } 591 PetscFree(recv_waits); 592 593 /* move the data into the send scatter */ 594 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 595 count = 0; 596 for ( i=0; i<nrecvs; i++ ) { 597 values = rvalues + i*nmax; 598 for ( j=0; j<lens[i]; j++ ) { 599 lrows[count++] = values[j] - base; 600 } 601 } 602 PetscFree(rvalues); PetscFree(lens); 603 PetscFree(owner); PetscFree(nprocs); 604 605 /* actually zap the local rows */ 606 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 607 PLogObjectParent(A,istmp); 608 609 /* 610 Zero the required rows. If the "diagonal block" of the matrix 611 is square and the user wishes to set the diagonal we use seperate 612 code so that MatSetValues() is not called for each diagonal allocating 613 new memory, thus calling lots of mallocs and slowing things down. 614 615 Contributed by: Mathew Knepley 616 */ 617 localdiag = PETSC_FALSE; 618 if (diag && (l->A->M == l->A->N)) { 619 localdiag = PETSC_TRUE; 620 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 621 } else { 622 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 623 } 624 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 625 ierr = ISDestroy(istmp); CHKERRQ(ierr); 626 627 if (diag && (localdiag == PETSC_FALSE)) { 628 for ( i = 0; i < slen; i++ ) { 629 row = lrows[i] + rstart; 630 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 631 } 632 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 633 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 634 } 635 636 if (diag) { 637 for ( i = 0; i < slen; i++ ) { 638 row = lrows[i] + rstart; 639 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 640 } 641 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 642 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 643 } 644 PetscFree(lrows); 645 646 /* wait on sends */ 647 if (nsends) { 648 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 649 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 650 PetscFree(send_status); 651 } 652 PetscFree(send_waits); PetscFree(svalues); 653 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNC__ 658 #define __FUNC__ "MatMult_MPIAIJ" 659 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 660 { 661 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 662 int ierr,nt; 663 664 PetscFunctionBegin; 665 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 666 if (nt != a->n) { 667 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 668 } 669 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 670 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 671 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 672 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNC__ 677 #define __FUNC__ "MatMultAdd_MPIAIJ" 678 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 681 int ierr; 682 683 PetscFunctionBegin; 684 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 685 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 686 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 687 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNC__ 692 #define __FUNC__ "MatMultTrans_MPIAIJ" 693 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 696 int ierr; 697 698 PetscFunctionBegin; 699 /* do nondiagonal part */ 700 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 701 /* send it on its way */ 702 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 703 /* do local part */ 704 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 705 /* receive remote parts: note this assumes the values are not actually */ 706 /* inserted in yy until the next line, which is true for my implementation*/ 707 /* but is not perhaps always true. */ 708 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 714 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 717 int ierr; 718 719 PetscFunctionBegin; 720 /* do nondiagonal part */ 721 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 722 /* send it on its way */ 723 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 724 /* do local part */ 725 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 726 /* receive remote parts: note this assumes the values are not actually */ 727 /* inserted in yy until the next line, which is true for my implementation*/ 728 /* but is not perhaps always true. */ 729 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 /* 734 This only works correctly for square matrices where the subblock A->A is the 735 diagonal block 736 */ 737 #undef __FUNC__ 738 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 739 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 740 { 741 int ierr; 742 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 743 744 PetscFunctionBegin; 745 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 746 if (a->rstart != a->cstart || a->rend != a->cend) { 747 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 748 } 749 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNC__ 754 #define __FUNC__ "MatScale_MPIAIJ" 755 int MatScale_MPIAIJ(Scalar *aa,Mat A) 756 { 757 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 758 int ierr; 759 760 PetscFunctionBegin; 761 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 762 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNC__ 767 #define __FUNC__ "MatDestroy_MPIAIJ" 768 int MatDestroy_MPIAIJ(Mat mat) 769 { 770 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 771 int ierr; 772 773 PetscFunctionBegin; 774 if (--mat->refct > 0) PetscFunctionReturn(0); 775 776 if (mat->mapping) { 777 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 778 } 779 if (mat->bmapping) { 780 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 781 } 782 if (mat->rmap) { 783 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 784 } 785 if (mat->cmap) { 786 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 787 } 788 #if defined(USE_PETSC_LOG) 789 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 790 #endif 791 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 792 PetscFree(aij->rowners); 793 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 794 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 795 if (aij->colmap) PetscFree(aij->colmap); 796 if (aij->garray) PetscFree(aij->garray); 797 if (aij->lvec) VecDestroy(aij->lvec); 798 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 799 if (aij->rowvalues) PetscFree(aij->rowvalues); 800 PetscFree(aij); 801 PLogObjectDestroy(mat); 802 PetscHeaderDestroy(mat); 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNC__ 807 #define __FUNC__ "MatView_MPIAIJ_Binary" 808 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 809 { 810 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 811 int ierr; 812 813 PetscFunctionBegin; 814 if (aij->size == 1) { 815 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 816 } 817 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 818 PetscFunctionReturn(0); 819 } 820 821 #undef __FUNC__ 822 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 823 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 824 { 825 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 826 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 827 int ierr, format,shift = C->indexshift,rank; 828 FILE *fd; 829 ViewerType vtype; 830 831 PetscFunctionBegin; 832 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 833 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 834 ierr = ViewerGetFormat(viewer,&format); 835 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 836 MatInfo info; 837 int flg; 838 MPI_Comm_rank(mat->comm,&rank); 839 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 840 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 841 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 842 PetscSequentialPhaseBegin(mat->comm,1); 843 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 844 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 845 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 846 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 847 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 848 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 849 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 850 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 851 fflush(fd); 852 PetscSequentialPhaseEnd(mat->comm,1); 853 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 856 PetscFunctionReturn(0); 857 } 858 } 859 860 if (vtype == DRAW_VIEWER) { 861 Draw draw; 862 PetscTruth isnull; 863 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 864 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 865 } 866 867 if (vtype == ASCII_FILE_VIEWER) { 868 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 869 PetscSequentialPhaseBegin(mat->comm,1); 870 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 871 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 872 aij->cend); 873 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 874 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 875 fflush(fd); 876 PetscSequentialPhaseEnd(mat->comm,1); 877 } else { 878 int size = aij->size; 879 rank = aij->rank; 880 if (size == 1) { 881 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 882 } else { 883 /* assemble the entire matrix onto first processor. */ 884 Mat A; 885 Mat_SeqAIJ *Aloc; 886 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 887 Scalar *a; 888 889 if (!rank) { 890 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 891 } else { 892 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 893 } 894 PLogObjectParent(mat,A); 895 896 /* copy over the A part */ 897 Aloc = (Mat_SeqAIJ*) aij->A->data; 898 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 899 row = aij->rstart; 900 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 901 for ( i=0; i<m; i++ ) { 902 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 903 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 904 } 905 aj = Aloc->j; 906 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 907 908 /* copy over the B part */ 909 Aloc = (Mat_SeqAIJ*) aij->B->data; 910 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 911 row = aij->rstart; 912 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 913 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 914 for ( i=0; i<m; i++ ) { 915 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 916 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 917 } 918 PetscFree(ct); 919 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 920 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 921 /* 922 Everyone has to call to draw the matrix since the graphics waits are 923 synchronized across all processors that share the Draw object 924 */ 925 if (!rank || vtype == DRAW_VIEWER) { 926 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 927 } 928 ierr = MatDestroy(A); CHKERRQ(ierr); 929 } 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNC__ 935 #define __FUNC__ "MatView_MPIAIJ" 936 int MatView_MPIAIJ(Mat mat,Viewer viewer) 937 { 938 int ierr; 939 ViewerType vtype; 940 941 PetscFunctionBegin; 942 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 943 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 944 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 945 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 946 } else if (vtype == BINARY_FILE_VIEWER) { 947 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 948 } else { 949 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 950 } 951 PetscFunctionReturn(0); 952 } 953 954 /* 955 This has to provide several versions. 956 957 2) a) use only local smoothing updating outer values only once. 958 b) local smoothing updating outer values each inner iteration 959 3) color updating out values betwen colors. 960 */ 961 #undef __FUNC__ 962 #define __FUNC__ "MatRelax_MPIAIJ" 963 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 964 double fshift,int its,Vec xx) 965 { 966 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 967 Mat AA = mat->A, BB = mat->B; 968 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 969 Scalar *b,*x,*xs,*ls,d,*v,sum; 970 int ierr,*idx, *diag; 971 int n = mat->n, m = mat->m, i,shift = A->indexshift; 972 973 PetscFunctionBegin; 974 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 975 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 976 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 977 xs = x + shift; /* shift by one for index start of 1 */ 978 ls = ls + shift; 979 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 980 diag = A->diag; 981 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 982 if (flag & SOR_ZERO_INITIAL_GUESS) { 983 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 984 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 985 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 986 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 990 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 991 while (its--) { 992 /* go down through the rows */ 993 for ( i=0; i<m; i++ ) { 994 n = A->i[i+1] - A->i[i]; 995 PLogFlops(4*n+3); 996 idx = A->j + A->i[i] + shift; 997 v = A->a + A->i[i] + shift; 998 sum = b[i]; 999 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1000 d = fshift + A->a[diag[i]+shift]; 1001 n = B->i[i+1] - B->i[i]; 1002 idx = B->j + B->i[i] + shift; 1003 v = B->a + B->i[i] + shift; 1004 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1005 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1006 } 1007 /* come up through the rows */ 1008 for ( i=m-1; i>-1; i-- ) { 1009 n = A->i[i+1] - A->i[i]; 1010 PLogFlops(4*n+3) 1011 idx = A->j + A->i[i] + shift; 1012 v = A->a + A->i[i] + shift; 1013 sum = b[i]; 1014 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1015 d = fshift + A->a[diag[i]+shift]; 1016 n = B->i[i+1] - B->i[i]; 1017 idx = B->j + B->i[i] + shift; 1018 v = B->a + B->i[i] + shift; 1019 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1020 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1021 } 1022 } 1023 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1024 if (flag & SOR_ZERO_INITIAL_GUESS) { 1025 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1026 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1027 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1028 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1032 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1033 while (its--) { 1034 for ( i=0; i<m; i++ ) { 1035 n = A->i[i+1] - A->i[i]; 1036 PLogFlops(4*n+3); 1037 idx = A->j + A->i[i] + shift; 1038 v = A->a + A->i[i] + shift; 1039 sum = b[i]; 1040 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1041 d = fshift + A->a[diag[i]+shift]; 1042 n = B->i[i+1] - B->i[i]; 1043 idx = B->j + B->i[i] + shift; 1044 v = B->a + B->i[i] + shift; 1045 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1046 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1047 } 1048 } 1049 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1050 if (flag & SOR_ZERO_INITIAL_GUESS) { 1051 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1052 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1053 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1054 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1059 while (its--) { 1060 for ( i=m-1; i>-1; i-- ) { 1061 n = A->i[i+1] - A->i[i]; 1062 PLogFlops(4*n+3); 1063 idx = A->j + A->i[i] + shift; 1064 v = A->a + A->i[i] + shift; 1065 sum = b[i]; 1066 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1067 d = fshift + A->a[diag[i]+shift]; 1068 n = B->i[i+1] - B->i[i]; 1069 idx = B->j + B->i[i] + shift; 1070 v = B->a + B->i[i] + shift; 1071 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1072 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1073 } 1074 } 1075 } else { 1076 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1077 } 1078 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1079 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1080 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNC__ 1085 #define __FUNC__ "MatGetInfo_MPIAIJ" 1086 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1087 { 1088 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1089 Mat A = mat->A, B = mat->B; 1090 int ierr; 1091 double isend[5], irecv[5]; 1092 1093 PetscFunctionBegin; 1094 info->block_size = 1.0; 1095 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1096 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1097 isend[3] = info->memory; isend[4] = info->mallocs; 1098 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1099 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1100 isend[3] += info->memory; isend[4] += info->mallocs; 1101 if (flag == MAT_LOCAL) { 1102 info->nz_used = isend[0]; 1103 info->nz_allocated = isend[1]; 1104 info->nz_unneeded = isend[2]; 1105 info->memory = isend[3]; 1106 info->mallocs = isend[4]; 1107 } else if (flag == MAT_GLOBAL_MAX) { 1108 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1109 info->nz_used = irecv[0]; 1110 info->nz_allocated = irecv[1]; 1111 info->nz_unneeded = irecv[2]; 1112 info->memory = irecv[3]; 1113 info->mallocs = irecv[4]; 1114 } else if (flag == MAT_GLOBAL_SUM) { 1115 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1116 info->nz_used = irecv[0]; 1117 info->nz_allocated = irecv[1]; 1118 info->nz_unneeded = irecv[2]; 1119 info->memory = irecv[3]; 1120 info->mallocs = irecv[4]; 1121 } 1122 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1123 info->fill_ratio_needed = 0; 1124 info->factor_mallocs = 0; 1125 info->rows_global = (double)mat->M; 1126 info->columns_global = (double)mat->N; 1127 info->rows_local = (double)mat->m; 1128 info->columns_local = (double)mat->N; 1129 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNC__ 1134 #define __FUNC__ "MatSetOption_MPIAIJ" 1135 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1136 { 1137 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1138 1139 PetscFunctionBegin; 1140 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1141 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1142 op == MAT_COLUMNS_UNSORTED || 1143 op == MAT_COLUMNS_SORTED || 1144 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1145 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1146 MatSetOption(a->A,op); 1147 MatSetOption(a->B,op); 1148 } else if (op == MAT_ROW_ORIENTED) { 1149 a->roworiented = 1; 1150 MatSetOption(a->A,op); 1151 MatSetOption(a->B,op); 1152 } else if (op == MAT_ROWS_SORTED || 1153 op == MAT_ROWS_UNSORTED || 1154 op == MAT_SYMMETRIC || 1155 op == MAT_STRUCTURALLY_SYMMETRIC || 1156 op == MAT_YES_NEW_DIAGONALS) 1157 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1158 else if (op == MAT_COLUMN_ORIENTED) { 1159 a->roworiented = 0; 1160 MatSetOption(a->A,op); 1161 MatSetOption(a->B,op); 1162 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1163 a->donotstash = 1; 1164 } else if (op == MAT_NO_NEW_DIAGONALS){ 1165 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1166 } else { 1167 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNC__ 1173 #define __FUNC__ "MatGetSize_MPIAIJ" 1174 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1175 { 1176 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1177 1178 PetscFunctionBegin; 1179 if (m) *m = mat->M; 1180 if (n) *n = mat->N; 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNC__ 1185 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1186 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1187 { 1188 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1189 1190 PetscFunctionBegin; 1191 if (m) *m = mat->m; 1192 if (n) *n = mat->n; 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNC__ 1197 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1198 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1199 { 1200 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1201 1202 PetscFunctionBegin; 1203 *m = mat->rstart; *n = mat->rend; 1204 PetscFunctionReturn(0); 1205 } 1206 1207 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1208 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1209 1210 #undef __FUNC__ 1211 #define __FUNC__ "MatGetRow_MPIAIJ" 1212 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1213 { 1214 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1215 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1216 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1217 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1218 int *cmap, *idx_p; 1219 1220 PetscFunctionBegin; 1221 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1222 mat->getrowactive = PETSC_TRUE; 1223 1224 if (!mat->rowvalues && (idx || v)) { 1225 /* 1226 allocate enough space to hold information from the longest row. 1227 */ 1228 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1229 int max = 1,m = mat->m,tmp; 1230 for ( i=0; i<m; i++ ) { 1231 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1232 if (max < tmp) { max = tmp; } 1233 } 1234 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1235 mat->rowindices = (int *) (mat->rowvalues + max); 1236 } 1237 1238 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1239 lrow = row - rstart; 1240 1241 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1242 if (!v) {pvA = 0; pvB = 0;} 1243 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1244 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1245 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1246 nztot = nzA + nzB; 1247 1248 cmap = mat->garray; 1249 if (v || idx) { 1250 if (nztot) { 1251 /* Sort by increasing column numbers, assuming A and B already sorted */ 1252 int imark = -1; 1253 if (v) { 1254 *v = v_p = mat->rowvalues; 1255 for ( i=0; i<nzB; i++ ) { 1256 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1257 else break; 1258 } 1259 imark = i; 1260 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1261 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1262 } 1263 if (idx) { 1264 *idx = idx_p = mat->rowindices; 1265 if (imark > -1) { 1266 for ( i=0; i<imark; i++ ) { 1267 idx_p[i] = cmap[cworkB[i]]; 1268 } 1269 } else { 1270 for ( i=0; i<nzB; i++ ) { 1271 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1272 else break; 1273 } 1274 imark = i; 1275 } 1276 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1277 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1278 } 1279 } else { 1280 if (idx) *idx = 0; 1281 if (v) *v = 0; 1282 } 1283 } 1284 *nz = nztot; 1285 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1286 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 #undef __FUNC__ 1291 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1292 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1293 { 1294 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1295 1296 PetscFunctionBegin; 1297 if (aij->getrowactive == PETSC_FALSE) { 1298 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1299 } 1300 aij->getrowactive = PETSC_FALSE; 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNC__ 1305 #define __FUNC__ "MatNorm_MPIAIJ" 1306 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1307 { 1308 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1309 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1310 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1311 double sum = 0.0; 1312 Scalar *v; 1313 1314 PetscFunctionBegin; 1315 if (aij->size == 1) { 1316 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1317 } else { 1318 if (type == NORM_FROBENIUS) { 1319 v = amat->a; 1320 for (i=0; i<amat->nz; i++ ) { 1321 #if defined(USE_PETSC_COMPLEX) 1322 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1323 #else 1324 sum += (*v)*(*v); v++; 1325 #endif 1326 } 1327 v = bmat->a; 1328 for (i=0; i<bmat->nz; i++ ) { 1329 #if defined(USE_PETSC_COMPLEX) 1330 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1331 #else 1332 sum += (*v)*(*v); v++; 1333 #endif 1334 } 1335 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1336 *norm = sqrt(*norm); 1337 } else if (type == NORM_1) { /* max column norm */ 1338 double *tmp, *tmp2; 1339 int *jj, *garray = aij->garray; 1340 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1341 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1342 PetscMemzero(tmp,aij->N*sizeof(double)); 1343 *norm = 0.0; 1344 v = amat->a; jj = amat->j; 1345 for ( j=0; j<amat->nz; j++ ) { 1346 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1347 } 1348 v = bmat->a; jj = bmat->j; 1349 for ( j=0; j<bmat->nz; j++ ) { 1350 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1351 } 1352 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1353 for ( j=0; j<aij->N; j++ ) { 1354 if (tmp2[j] > *norm) *norm = tmp2[j]; 1355 } 1356 PetscFree(tmp); PetscFree(tmp2); 1357 } else if (type == NORM_INFINITY) { /* max row norm */ 1358 double ntemp = 0.0; 1359 for ( j=0; j<amat->m; j++ ) { 1360 v = amat->a + amat->i[j] + shift; 1361 sum = 0.0; 1362 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1363 sum += PetscAbsScalar(*v); v++; 1364 } 1365 v = bmat->a + bmat->i[j] + shift; 1366 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1367 sum += PetscAbsScalar(*v); v++; 1368 } 1369 if (sum > ntemp) ntemp = sum; 1370 } 1371 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1372 } else { 1373 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1374 } 1375 } 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #undef __FUNC__ 1380 #define __FUNC__ "MatTranspose_MPIAIJ" 1381 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1382 { 1383 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1384 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1385 int ierr,shift = Aloc->indexshift; 1386 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1387 Mat B; 1388 Scalar *array; 1389 1390 PetscFunctionBegin; 1391 if (matout == PETSC_NULL && M != N) { 1392 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1393 } 1394 1395 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1396 1397 /* copy over the A part */ 1398 Aloc = (Mat_SeqAIJ*) a->A->data; 1399 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1400 row = a->rstart; 1401 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1402 for ( i=0; i<m; i++ ) { 1403 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1404 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1405 } 1406 aj = Aloc->j; 1407 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1408 1409 /* copy over the B part */ 1410 Aloc = (Mat_SeqAIJ*) a->B->data; 1411 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1412 row = a->rstart; 1413 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1414 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1415 for ( i=0; i<m; i++ ) { 1416 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1417 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1418 } 1419 PetscFree(ct); 1420 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1421 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1422 if (matout != PETSC_NULL) { 1423 *matout = B; 1424 } else { 1425 PetscOps *Abops; 1426 struct _MatOps *Aops; 1427 1428 /* This isn't really an in-place transpose .... but free data structures from a */ 1429 PetscFree(a->rowners); 1430 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1431 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1432 if (a->colmap) PetscFree(a->colmap); 1433 if (a->garray) PetscFree(a->garray); 1434 if (a->lvec) VecDestroy(a->lvec); 1435 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1436 PetscFree(a); 1437 1438 /* 1439 This is horrible, horrible code. We need to keep the 1440 A pointers for the bops and ops but copy everything 1441 else from C. 1442 */ 1443 Abops = A->bops; 1444 Aops = A->ops; 1445 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1446 A->bops = Abops; 1447 A->ops = Aops; 1448 PetscHeaderDestroy(B); 1449 } 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNC__ 1454 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1455 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1456 { 1457 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1458 Mat a = aij->A, b = aij->B; 1459 int ierr,s1,s2,s3; 1460 1461 PetscFunctionBegin; 1462 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1463 if (rr) { 1464 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1465 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1466 /* Overlap communication with computation. */ 1467 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1468 } 1469 if (ll) { 1470 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1471 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1472 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1473 } 1474 /* scale the diagonal block */ 1475 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1476 1477 if (rr) { 1478 /* Do a scatter end and then right scale the off-diagonal block */ 1479 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1480 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1481 } 1482 1483 PetscFunctionReturn(0); 1484 } 1485 1486 1487 extern int MatPrintHelp_SeqAIJ(Mat); 1488 #undef __FUNC__ 1489 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1490 int MatPrintHelp_MPIAIJ(Mat A) 1491 { 1492 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1493 int ierr; 1494 1495 PetscFunctionBegin; 1496 if (!a->rank) { 1497 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1498 } 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNC__ 1503 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1504 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1505 { 1506 PetscFunctionBegin; 1507 *bs = 1; 1508 PetscFunctionReturn(0); 1509 } 1510 #undef __FUNC__ 1511 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1512 int MatSetUnfactored_MPIAIJ(Mat A) 1513 { 1514 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1515 int ierr; 1516 1517 PetscFunctionBegin; 1518 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNC__ 1523 #define __FUNC__ "MatEqual_MPIAIJ" 1524 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1525 { 1526 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1527 Mat a, b, c, d; 1528 PetscTruth flg; 1529 int ierr; 1530 1531 PetscFunctionBegin; 1532 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1533 a = matA->A; b = matA->B; 1534 c = matB->A; d = matB->B; 1535 1536 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1537 if (flg == PETSC_TRUE) { 1538 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1539 } 1540 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNC__ 1545 #define __FUNC__ "MatCopy_MPIAIJ" 1546 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1547 { 1548 int ierr; 1549 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1550 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1551 1552 PetscFunctionBegin; 1553 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1554 /* because of the column compression in the off-processor part of the matrix a->B, 1555 the number of columns in a->B and b->B may be different, hence we cannot call 1556 the MatCopy() directly on the two parts. If need be, we can provide a more 1557 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1558 then copying the submatrices */ 1559 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1560 } else { 1561 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1562 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1563 } 1564 PetscFunctionReturn(0); 1565 } 1566 1567 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1568 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1569 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1570 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1571 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1572 1573 /* -------------------------------------------------------------------*/ 1574 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1575 MatGetRow_MPIAIJ, 1576 MatRestoreRow_MPIAIJ, 1577 MatMult_MPIAIJ, 1578 MatMultAdd_MPIAIJ, 1579 MatMultTrans_MPIAIJ, 1580 MatMultTransAdd_MPIAIJ, 1581 0, 1582 0, 1583 0, 1584 0, 1585 0, 1586 0, 1587 MatRelax_MPIAIJ, 1588 MatTranspose_MPIAIJ, 1589 MatGetInfo_MPIAIJ, 1590 MatEqual_MPIAIJ, 1591 MatGetDiagonal_MPIAIJ, 1592 MatDiagonalScale_MPIAIJ, 1593 MatNorm_MPIAIJ, 1594 MatAssemblyBegin_MPIAIJ, 1595 MatAssemblyEnd_MPIAIJ, 1596 0, 1597 MatSetOption_MPIAIJ, 1598 MatZeroEntries_MPIAIJ, 1599 MatZeroRows_MPIAIJ, 1600 0, 1601 0, 1602 0, 1603 0, 1604 MatGetSize_MPIAIJ, 1605 MatGetLocalSize_MPIAIJ, 1606 MatGetOwnershipRange_MPIAIJ, 1607 0, 1608 0, 1609 0, 1610 0, 1611 MatDuplicate_MPIAIJ, 1612 0, 1613 0, 1614 0, 1615 0, 1616 0, 1617 MatGetSubMatrices_MPIAIJ, 1618 MatIncreaseOverlap_MPIAIJ, 1619 MatGetValues_MPIAIJ, 1620 MatCopy_MPIAIJ, 1621 MatPrintHelp_MPIAIJ, 1622 MatScale_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 MatGetBlockSize_MPIAIJ, 1627 0, 1628 0, 1629 0, 1630 0, 1631 MatFDColoringCreate_MPIAIJ, 1632 0, 1633 MatSetUnfactored_MPIAIJ, 1634 0, 1635 0, 1636 MatGetSubMatrix_MPIAIJ, 1637 0, 1638 0, 1639 MatGetMaps_Petsc}; 1640 1641 /* ----------------------------------------------------------------------------------------*/ 1642 1643 EXTERN_C_BEGIN 1644 #undef __FUNC__ 1645 #define __FUNC__ "MatStoreValues_MPIAIJ" 1646 int MatStoreValues_MPIAIJ(Mat mat) 1647 { 1648 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1649 int ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1653 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1654 PetscFunctionReturn(0); 1655 } 1656 EXTERN_C_END 1657 1658 EXTERN_C_BEGIN 1659 #undef __FUNC__ 1660 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1661 int MatRetrieveValues_MPIAIJ(Mat mat) 1662 { 1663 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1664 int ierr; 1665 1666 PetscFunctionBegin; 1667 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1668 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1669 PetscFunctionReturn(0); 1670 } 1671 EXTERN_C_END 1672 1673 #undef __FUNC__ 1674 #define __FUNC__ "MatCreateMPIAIJ" 1675 /*@C 1676 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1677 (the default parallel PETSc format). For good matrix assembly performance 1678 the user should preallocate the matrix storage by setting the parameters 1679 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1680 performance can be increased by more than a factor of 50. 1681 1682 Collective on MPI_Comm 1683 1684 Input Parameters: 1685 + comm - MPI communicator 1686 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1687 This value should be the same as the local size used in creating the 1688 y vector for the matrix-vector product y = Ax. 1689 . n - This value should be the same as the local size used in creating the 1690 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1691 calculated if N is given) For square matrices n is almost always m. 1692 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1693 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1694 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1695 (same value is used for all local rows) 1696 . d_nnz - array containing the number of nonzeros in the various rows of the 1697 DIAGONAL portion of the local submatrix (possibly different for each row) 1698 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1699 The size of this array is equal to the number of local rows, i.e 'm'. 1700 You must leave room for the diagonal entry even if it is zero. 1701 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1702 submatrix (same value is used for all local rows). 1703 - o_nnz - array containing the number of nonzeros in the various rows of the 1704 OFF-DIAGONAL portion of the local submatrix (possibly different for 1705 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1706 structure. The size of this array is equal to the number 1707 of local rows, i.e 'm'. 1708 1709 Output Parameter: 1710 . A - the matrix 1711 1712 Notes: 1713 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1714 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1715 storage requirements for this matrix. 1716 1717 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1718 processor than it must be used on all processors that share the object for 1719 that argument. 1720 1721 The AIJ format (also called the Yale sparse matrix format or 1722 compressed row storage), is fully compatible with standard Fortran 77 1723 storage. That is, the stored row and column indices can begin at 1724 either one (as in Fortran) or zero. See the users manual for details. 1725 1726 The user MUST specify either the local or global matrix dimensions 1727 (possibly both). 1728 1729 The parallel matrix is partitioned such that the first m0 rows belong to 1730 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1731 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1732 1733 The DIAGONAL portion of the local submatrix of a processor can be defined 1734 as the submatrix which is obtained by extraction the part corresponding 1735 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1736 first row that belongs to the processor, and r2 is the last row belonging 1737 to the this processor. This is a square mxm matrix. The remaining portion 1738 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1739 1740 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1741 1742 By default, this format uses inodes (identical nodes) when possible. 1743 We search for consecutive rows with the same nonzero structure, thereby 1744 reusing matrix information to achieve increased efficiency. 1745 1746 Options Database Keys: 1747 + -mat_aij_no_inode - Do not use inodes 1748 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1749 - -mat_aij_oneindex - Internally use indexing starting at 1 1750 rather than 0. Note that when calling MatSetValues(), 1751 the user still MUST index entries starting at 0! 1752 . -mat_mpi - use the parallel matrix data structures even on one processor 1753 (defaults to using SeqBAIJ format on one processor) 1754 . -mat_mpi - use the parallel matrix data structures even on one processor 1755 (defaults to using SeqAIJ format on one processor) 1756 1757 1758 Example usage: 1759 1760 Consider the following 8x8 matrix with 34 non-zero values, that is 1761 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1762 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1763 as follows: 1764 1765 .vb 1766 1 2 0 | 0 3 0 | 0 4 1767 Proc0 0 5 6 | 7 0 0 | 8 0 1768 9 0 10 | 11 0 0 | 12 0 1769 ------------------------------------- 1770 13 0 14 | 15 16 17 | 0 0 1771 Proc1 0 18 0 | 19 20 21 | 0 0 1772 0 0 0 | 22 23 0 | 24 0 1773 ------------------------------------- 1774 Proc2 25 26 27 | 0 0 28 | 29 0 1775 30 0 0 | 31 32 33 | 0 34 1776 .ve 1777 1778 This can be represented as a collection of submatrices as: 1779 1780 .vb 1781 A B C 1782 D E F 1783 G H I 1784 .ve 1785 1786 Where the submatrices A,B,C are owned by proc0, D,E,F are 1787 owned by proc1, G,H,I are owned by proc2. 1788 1789 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1790 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1791 The 'M','N' parameters are 8,8, and have the same values on all procs. 1792 1793 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1794 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1795 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1796 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1797 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1798 matrix, ans [DF] as another SeqAIJ matrix. 1799 1800 When d_nz, o_nz parameters are specified, d_nz storage elements are 1801 allocated for every row of the local diagonal submatrix, and o_nz 1802 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1803 One way to choose d_nz and o_nz is to use the max nonzerors per local 1804 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1805 In this case, the values of d_nz,o_nz are: 1806 .vb 1807 proc0 : dnz = 2, o_nz = 2 1808 proc1 : dnz = 3, o_nz = 2 1809 proc2 : dnz = 1, o_nz = 4 1810 .ve 1811 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1812 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1813 for proc3. i.e we are using 12+15+10=37 storage locations to store 1814 34 values. 1815 1816 When d_nnz, o_nnz parameters are specified, the storage is specified 1817 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1818 In the above case the values for d_nnz,o_nnz are: 1819 .vb 1820 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1821 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1822 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1823 .ve 1824 Here the space allocated is sum of all the above values i.e 34, and 1825 hence pre-allocation is perfect. 1826 1827 .keywords: matrix, aij, compressed row, sparse, parallel 1828 1829 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1830 @*/ 1831 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1832 { 1833 Mat B; 1834 Mat_MPIAIJ *b; 1835 int ierr, i,sum[2],work[2],size,flag1 = 0, flag2 = 0; 1836 1837 PetscFunctionBegin; 1838 MPI_Comm_size(comm,&size); 1839 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1840 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1841 if (!flag1 && !flag2 && size == 1) { 1842 if (M == PETSC_DECIDE) M = m; 1843 if (N == PETSC_DECIDE) N = n; 1844 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 *A = 0; 1849 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1850 PLogObjectCreate(B); 1851 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1852 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1853 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1854 B->ops->destroy = MatDestroy_MPIAIJ; 1855 B->ops->view = MatView_MPIAIJ; 1856 B->factor = 0; 1857 B->assembled = PETSC_FALSE; 1858 B->mapping = 0; 1859 1860 B->insertmode = NOT_SET_VALUES; 1861 b->size = size; 1862 MPI_Comm_rank(comm,&b->rank); 1863 1864 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1865 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1866 } 1867 1868 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1869 work[0] = m; work[1] = n; 1870 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1871 if (M == PETSC_DECIDE) M = sum[0]; 1872 if (N == PETSC_DECIDE) N = sum[1]; 1873 } 1874 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1875 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1876 b->m = m; B->m = m; 1877 b->n = n; B->n = n; 1878 b->N = N; B->N = N; 1879 b->M = M; B->M = M; 1880 1881 /* the information in the maps duplicates the information computed below, eventually 1882 we should remove the duplicate information that is not contained in the maps */ 1883 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1884 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1885 1886 /* build local table of row and column ownerships */ 1887 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1888 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1889 b->cowners = b->rowners + b->size + 2; 1890 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1891 b->rowners[0] = 0; 1892 for ( i=2; i<=b->size; i++ ) { 1893 b->rowners[i] += b->rowners[i-1]; 1894 } 1895 b->rstart = b->rowners[b->rank]; 1896 b->rend = b->rowners[b->rank+1]; 1897 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1898 b->cowners[0] = 0; 1899 for ( i=2; i<=b->size; i++ ) { 1900 b->cowners[i] += b->cowners[i-1]; 1901 } 1902 b->cstart = b->cowners[b->rank]; 1903 b->cend = b->cowners[b->rank+1]; 1904 1905 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1906 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1907 PLogObjectParent(B,b->A); 1908 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1909 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1910 PLogObjectParent(B,b->B); 1911 1912 /* build cache for off array entries formed */ 1913 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1914 b->donotstash = 0; 1915 b->colmap = 0; 1916 b->garray = 0; 1917 b->roworiented = 1; 1918 1919 /* stuff used for matrix vector multiply */ 1920 b->lvec = 0; 1921 b->Mvctx = 0; 1922 1923 /* stuff for MatGetRow() */ 1924 b->rowindices = 0; 1925 b->rowvalues = 0; 1926 b->getrowactive = PETSC_FALSE; 1927 1928 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1929 "MatStoreValues_MPIAIJ", 1930 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1931 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1932 "MatRetrieveValues_MPIAIJ", 1933 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1934 *A = B; 1935 PetscFunctionReturn(0); 1936 } 1937 1938 #undef __FUNC__ 1939 #define __FUNC__ "MatDuplicate_MPIAIJ" 1940 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1941 { 1942 Mat mat; 1943 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1944 int ierr, len=0, flg; 1945 1946 PetscFunctionBegin; 1947 *newmat = 0; 1948 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1949 PLogObjectCreate(mat); 1950 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1951 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1952 mat->ops->destroy = MatDestroy_MPIAIJ; 1953 mat->ops->view = MatView_MPIAIJ; 1954 mat->factor = matin->factor; 1955 mat->assembled = PETSC_TRUE; 1956 1957 a->m = mat->m = oldmat->m; 1958 a->n = mat->n = oldmat->n; 1959 a->M = mat->M = oldmat->M; 1960 a->N = mat->N = oldmat->N; 1961 1962 a->rstart = oldmat->rstart; 1963 a->rend = oldmat->rend; 1964 a->cstart = oldmat->cstart; 1965 a->cend = oldmat->cend; 1966 a->size = oldmat->size; 1967 a->rank = oldmat->rank; 1968 mat->insertmode = NOT_SET_VALUES; 1969 a->rowvalues = 0; 1970 a->getrowactive = PETSC_FALSE; 1971 1972 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1973 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1974 a->cowners = a->rowners + a->size + 2; 1975 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1976 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1977 if (oldmat->colmap) { 1978 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1979 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1980 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1981 } else a->colmap = 0; 1982 if (oldmat->garray) { 1983 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1984 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1985 PLogObjectMemory(mat,len*sizeof(int)); 1986 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1987 } else a->garray = 0; 1988 1989 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1990 PLogObjectParent(mat,a->lvec); 1991 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1992 PLogObjectParent(mat,a->Mvctx); 1993 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 1994 PLogObjectParent(mat,a->A); 1995 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 1996 PLogObjectParent(mat,a->B); 1997 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1998 if (flg) { 1999 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2000 } 2001 *newmat = mat; 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #include "sys.h" 2006 2007 #undef __FUNC__ 2008 #define __FUNC__ "MatLoad_MPIAIJ" 2009 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2010 { 2011 Mat A; 2012 Scalar *vals,*svals; 2013 MPI_Comm comm = ((PetscObject)viewer)->comm; 2014 MPI_Status status; 2015 int i, nz, ierr, j,rstart, rend, fd; 2016 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2017 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2018 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2019 2020 PetscFunctionBegin; 2021 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2022 if (!rank) { 2023 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2024 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2025 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2026 if (header[3] < 0) { 2027 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2028 } 2029 } 2030 2031 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2032 M = header[1]; N = header[2]; 2033 /* determine ownership of all rows */ 2034 m = M/size + ((M % size) > rank); 2035 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2036 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2037 rowners[0] = 0; 2038 for ( i=2; i<=size; i++ ) { 2039 rowners[i] += rowners[i-1]; 2040 } 2041 rstart = rowners[rank]; 2042 rend = rowners[rank+1]; 2043 2044 /* distribute row lengths to all processors */ 2045 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2046 offlens = ourlens + (rend-rstart); 2047 if (!rank) { 2048 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2049 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2050 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2051 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2052 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2053 PetscFree(sndcounts); 2054 } else { 2055 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2056 } 2057 2058 if (!rank) { 2059 /* calculate the number of nonzeros on each processor */ 2060 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2061 PetscMemzero(procsnz,size*sizeof(int)); 2062 for ( i=0; i<size; i++ ) { 2063 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2064 procsnz[i] += rowlengths[j]; 2065 } 2066 } 2067 PetscFree(rowlengths); 2068 2069 /* determine max buffer needed and allocate it */ 2070 maxnz = 0; 2071 for ( i=0; i<size; i++ ) { 2072 maxnz = PetscMax(maxnz,procsnz[i]); 2073 } 2074 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2075 2076 /* read in my part of the matrix column indices */ 2077 nz = procsnz[0]; 2078 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2079 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2080 2081 /* read in every one elses and ship off */ 2082 for ( i=1; i<size; i++ ) { 2083 nz = procsnz[i]; 2084 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2085 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2086 } 2087 PetscFree(cols); 2088 } else { 2089 /* determine buffer space needed for message */ 2090 nz = 0; 2091 for ( i=0; i<m; i++ ) { 2092 nz += ourlens[i]; 2093 } 2094 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2095 2096 /* receive message of column indices*/ 2097 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2098 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2099 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2100 } 2101 2102 /* determine column ownership if matrix is not square */ 2103 if (N != M) { 2104 n = N/size + ((N % size) > rank); 2105 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2106 cstart = cend - n; 2107 } else { 2108 cstart = rstart; 2109 cend = rend; 2110 n = cend - cstart; 2111 } 2112 2113 /* loop over local rows, determining number of off diagonal entries */ 2114 PetscMemzero(offlens,m*sizeof(int)); 2115 jj = 0; 2116 for ( i=0; i<m; i++ ) { 2117 for ( j=0; j<ourlens[i]; j++ ) { 2118 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2119 jj++; 2120 } 2121 } 2122 2123 /* create our matrix */ 2124 for ( i=0; i<m; i++ ) { 2125 ourlens[i] -= offlens[i]; 2126 } 2127 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2128 A = *newmat; 2129 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2130 for ( i=0; i<m; i++ ) { 2131 ourlens[i] += offlens[i]; 2132 } 2133 2134 if (!rank) { 2135 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2136 2137 /* read in my part of the matrix numerical values */ 2138 nz = procsnz[0]; 2139 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2140 2141 /* insert into matrix */ 2142 jj = rstart; 2143 smycols = mycols; 2144 svals = vals; 2145 for ( i=0; i<m; i++ ) { 2146 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2147 smycols += ourlens[i]; 2148 svals += ourlens[i]; 2149 jj++; 2150 } 2151 2152 /* read in other processors and ship out */ 2153 for ( i=1; i<size; i++ ) { 2154 nz = procsnz[i]; 2155 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2156 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2157 } 2158 PetscFree(procsnz); 2159 } else { 2160 /* receive numeric values */ 2161 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2162 2163 /* receive message of values*/ 2164 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2165 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2166 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2167 2168 /* insert into matrix */ 2169 jj = rstart; 2170 smycols = mycols; 2171 svals = vals; 2172 for ( i=0; i<m; i++ ) { 2173 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2174 smycols += ourlens[i]; 2175 svals += ourlens[i]; 2176 jj++; 2177 } 2178 } 2179 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2180 2181 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2182 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNC__ 2187 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2188 /* 2189 Not great since it makes two copies of the submatrix, first an SeqAIJ 2190 in local and then by concatenating the local matrices the end result. 2191 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2192 */ 2193 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 2194 { 2195 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2196 Mat *local,M, Mreuse; 2197 Scalar *vwork,*aa; 2198 MPI_Comm comm = mat->comm; 2199 Mat_SeqAIJ *aij; 2200 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2201 2202 PetscFunctionBegin; 2203 MPI_Comm_rank(comm,&rank); 2204 MPI_Comm_size(comm,&size); 2205 2206 if (call == MAT_REUSE_MATRIX) { 2207 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2208 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2209 local = &Mreuse; 2210 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2211 } else { 2212 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2213 Mreuse = *local; 2214 PetscFree(local); 2215 } 2216 2217 /* 2218 m - number of local rows 2219 n - number of columns (same on all processors) 2220 rstart - first row in new global matrix generated 2221 */ 2222 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2223 if (call == MAT_INITIAL_MATRIX) { 2224 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2225 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2226 ii = aij->i; 2227 jj = aij->j; 2228 2229 /* 2230 Determine the number of non-zeros in the diagonal and off-diagonal 2231 portions of the matrix in order to do correct preallocation 2232 */ 2233 2234 /* first get start and end of "diagonal" columns */ 2235 if (csize == PETSC_DECIDE) { 2236 nlocal = n/size + ((n % size) > rank); 2237 } else { 2238 nlocal = csize; 2239 } 2240 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2241 rstart = rend - nlocal; 2242 if (rank == size - 1 && rend != n) { 2243 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2244 } 2245 2246 /* next, compute all the lengths */ 2247 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2248 olens = dlens + m; 2249 for ( i=0; i<m; i++ ) { 2250 jend = ii[i+1] - ii[i]; 2251 olen = 0; 2252 dlen = 0; 2253 for ( j=0; j<jend; j++ ) { 2254 if ( *jj < rstart || *jj >= rend) olen++; 2255 else dlen++; 2256 jj++; 2257 } 2258 olens[i] = olen; 2259 dlens[i] = dlen; 2260 } 2261 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2262 PetscFree(dlens); 2263 } else { 2264 int ml,nl; 2265 2266 M = *newmat; 2267 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2268 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2269 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2270 /* 2271 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2272 rather than the slower MatSetValues(). 2273 */ 2274 M->was_assembled = PETSC_TRUE; 2275 M->assembled = PETSC_FALSE; 2276 } 2277 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2278 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2279 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2280 ii = aij->i; 2281 jj = aij->j; 2282 aa = aij->a; 2283 for (i=0; i<m; i++) { 2284 row = rstart + i; 2285 nz = ii[i+1] - ii[i]; 2286 cwork = jj; jj += nz; 2287 vwork = aa; aa += nz; 2288 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2289 } 2290 2291 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2292 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2293 *newmat = M; 2294 2295 /* save submatrix used in processor for next request */ 2296 if (call == MAT_INITIAL_MATRIX) { 2297 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2298 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2299 } 2300 2301 PetscFunctionReturn(0); 2302 } 2303