1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.271 1999/01/08 21:30:25 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i,ierr; 21 22 PetscFunctionBegin; 23 #if defined (USE_CTABLE) 24 ierr = TableCreate( &aij->colmap, aij->n/5 ); CHKERRQ(ierr); 25 for ( i=0; i<n; i++ ){ 26 ierr = TableAdd( aij->colmap, aij->garray[i] + 1, i+1 );CHKERRQ(ierr); 27 } 28 #else 29 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 30 PLogObjectMemory(mat,aij->N*sizeof(int)); 31 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 32 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 33 #endif 34 PetscFunctionReturn(0); 35 } 36 37 extern int DisAssemble_MPIAIJ(Mat); 38 39 #define CHUNKSIZE 15 40 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 41 { \ 42 \ 43 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 44 rmax = aimax[row]; nrow = ailen[row]; \ 45 col1 = col - shift; \ 46 \ 47 low = 0; high = nrow; \ 48 while (high-low > 5) { \ 49 t = (low+high)/2; \ 50 if (rp[t] > col) high = t; \ 51 else low = t; \ 52 } \ 53 for ( _i=0; _i<nrow; _i++ ) { \ 54 if (rp[_i] > col1) break; \ 55 if (rp[_i] == col1) { \ 56 if (addv == ADD_VALUES) ap[_i] += value; \ 57 else ap[_i] = value; \ 58 goto a_noinsert; \ 59 } \ 60 } \ 61 if (nonew == 1) goto a_noinsert; \ 62 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 63 if (nrow >= rmax) { \ 64 /* there is no extra room in row, therefore enlarge */ \ 65 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 66 Scalar *new_a; \ 67 \ 68 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 69 \ 70 /* malloc new storage space */ \ 71 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 72 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 73 new_j = (int *) (new_a + new_nz); \ 74 new_i = new_j + new_nz; \ 75 \ 76 /* copy over old data into new slots */ \ 77 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 78 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 79 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 80 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 81 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 82 len*sizeof(int)); \ 83 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 84 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 85 len*sizeof(Scalar)); \ 86 /* free up old matrix storage */ \ 87 \ 88 PetscFree(a->a); \ 89 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = 1; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for ( ii=N; ii>=_i; ii-- ) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for ( _i=0; _i<nrow; _i++ ) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (nonew == 1) goto b_noinsert; \ 133 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 134 if (nrow >= rmax) { \ 135 /* there is no extra room in row, therefore enlarge */ \ 136 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 137 Scalar *new_a; \ 138 \ 139 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 140 \ 141 /* malloc new storage space */ \ 142 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 143 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 144 new_j = (int *) (new_a + new_nz); \ 145 new_i = new_j + new_nz; \ 146 \ 147 /* copy over old data into new slots */ \ 148 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 149 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 150 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 151 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 152 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 153 len*sizeof(int)); \ 154 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 155 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 156 len*sizeof(Scalar)); \ 157 /* free up old matrix storage */ \ 158 \ 159 PetscFree(b->a); \ 160 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 161 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 162 b->singlemalloc = 1; \ 163 \ 164 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 165 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 166 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 167 b->maxnz += CHUNKSIZE; \ 168 b->reallocs++; \ 169 } \ 170 N = nrow++ - 1; b->nz++; \ 171 /* shift up all the later entries in this row */ \ 172 for ( ii=N; ii>=_i; ii-- ) { \ 173 rp[ii+1] = rp[ii]; \ 174 ap[ii+1] = ap[ii]; \ 175 } \ 176 rp[_i] = col1; \ 177 ap[_i] = value; \ 178 b_noinsert: ; \ 179 bilen[row] = nrow; \ 180 } 181 182 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 183 #undef __FUNC__ 184 #define __FUNC__ "MatSetValues_MPIAIJ" 185 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 188 Scalar value; 189 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 190 int cstart = aij->cstart, cend = aij->cend,row,col; 191 int roworiented = aij->roworiented; 192 193 /* Some Variables required in the macro */ 194 Mat A = aij->A; 195 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 196 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 197 Scalar *aa = a->a; 198 199 Mat B = aij->B; 200 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 201 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 202 Scalar *ba = b->a; 203 204 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 205 int nonew = a->nonew,shift = a->indexshift; 206 Scalar *ap; 207 208 PetscFunctionBegin; 209 for ( i=0; i<m; i++ ) { 210 #if defined(USE_PETSC_BOPT_g) 211 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 212 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 213 #endif 214 if (im[i] >= rstart && im[i] < rend) { 215 row = im[i] - rstart; 216 for ( j=0; j<n; j++ ) { 217 if (in[j] >= cstart && in[j] < cend){ 218 col = in[j] - cstart; 219 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 220 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 221 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 222 } 223 #if defined(USE_PETSC_BOPT_g) 224 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 225 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 226 #endif 227 else { 228 if (mat->was_assembled) { 229 if (!aij->colmap) { 230 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 231 } 232 #if defined (USE_CTABLE) 233 col = TableFind( aij->colmap, in[j] + 1 ) - 1; 234 #else 235 col = aij->colmap[in[j]] - 1; 236 #endif 237 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 238 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 239 col = in[j]; 240 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 241 B = aij->B; 242 b = (Mat_SeqAIJ *) B->data; 243 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 244 ba = b->a; 245 } 246 } else col = in[j]; 247 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 248 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 249 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 250 } 251 } 252 } 253 else { 254 if (roworiented && !aij->donotstash) { 255 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 256 } 257 else { 258 if (!aij->donotstash) { 259 row = im[i]; 260 for ( j=0; j<n; j++ ) { 261 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 262 } 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNC__ 271 #define __FUNC__ "MatGetValues_MPIAIJ" 272 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 275 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 276 int cstart = aij->cstart, cend = aij->cend,row,col; 277 278 PetscFunctionBegin; 279 for ( i=0; i<m; i++ ) { 280 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 281 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 282 if (idxm[i] >= rstart && idxm[i] < rend) { 283 row = idxm[i] - rstart; 284 for ( j=0; j<n; j++ ) { 285 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 286 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 287 if (idxn[j] >= cstart && idxn[j] < cend){ 288 col = idxn[j] - cstart; 289 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } else { 291 if (!aij->colmap) { 292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 293 } 294 #if defined (USE_CTABLE) 295 col = TableFind( aij->colmap, idxn[j] + 1 ) - 1; 296 #else 297 col = aij->colmap[idxn[j]] - 1; 298 #endif 299 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 300 else { 301 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 302 } 303 } 304 } 305 } else { 306 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 307 } 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNC__ 313 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 314 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 315 { 316 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 317 MPI_Comm comm = mat->comm; 318 int size = aij->size, *owners = aij->rowners; 319 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 320 MPI_Request *send_waits,*recv_waits; 321 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 322 InsertMode addv; 323 Scalar *rvalues,*svalues; 324 325 PetscFunctionBegin; 326 if (aij->donotstash) { 327 aij->svalues = 0; aij->rvalues = 0; 328 aij->nsends = 0; aij->nrecvs = 0; 329 aij->send_waits = 0; aij->recv_waits = 0; 330 aij->rmax = 0; 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 /* first count number of contributors to each processor */ 342 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 343 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 344 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 345 for ( i=0; i<aij->stash.n; i++ ) { 346 idx = aij->stash.idx[i]; 347 for ( j=0; j<size; j++ ) { 348 if (idx >= owners[j] && idx < owners[j+1]) { 349 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 350 } 351 } 352 } 353 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 354 355 /* inform other processors of number of messages and max length*/ 356 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 357 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 358 nreceives = work[rank]; 359 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 360 nmax = work[rank]; 361 PetscFree(work); 362 363 /* post receives: 364 1) each message will consist of ordered pairs 365 (global index,value) we store the global index as a double 366 to simplify the message passing. 367 2) since we don't know how long each individual message is we 368 allocate the largest needed buffer for each receive. Potentially 369 this is a lot of wasted space. 370 371 372 This could be done better. 373 */ 374 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 375 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 376 for ( i=0; i<nreceives; i++ ) { 377 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 378 comm,recv_waits+i);CHKERRQ(ierr); 379 } 380 381 /* do sends: 382 1) starts[i] gives the starting index in svalues for stuff going to 383 the ith processor 384 */ 385 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 386 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 387 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 388 starts[0] = 0; 389 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 390 for ( i=0; i<aij->stash.n; i++ ) { 391 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 392 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 393 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 394 } 395 PetscFree(owner); 396 starts[0] = 0; 397 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 398 count = 0; 399 for ( i=0; i<size; i++ ) { 400 if (procs[i]) { 401 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 402 } 403 } 404 PetscFree(starts); 405 PetscFree(nprocs); 406 407 /* Free cache space */ 408 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 409 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 410 411 aij->svalues = svalues; aij->rvalues = rvalues; 412 aij->nsends = nsends; aij->nrecvs = nreceives; 413 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 414 aij->rmax = nmax; 415 416 PetscFunctionReturn(0); 417 } 418 extern int MatSetUpMultiply_MPIAIJ(Mat); 419 420 #undef __FUNC__ 421 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 422 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 423 { 424 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 425 MPI_Status *send_status,recv_status; 426 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 427 int row,col,other_disassembled; 428 Scalar *values,val; 429 InsertMode addv = mat->insertmode; 430 431 PetscFunctionBegin; 432 433 /* wait on receives */ 434 while (count) { 435 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 436 /* unpack receives into our local space */ 437 values = aij->rvalues + 3*imdex*aij->rmax; 438 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 439 n = n/3; 440 for ( i=0; i<n; i++ ) { 441 row = (int) PetscReal(values[3*i]) - aij->rstart; 442 col = (int) PetscReal(values[3*i+1]); 443 val = values[3*i+2]; 444 if (col >= aij->cstart && col < aij->cend) { 445 col -= aij->cstart; 446 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 447 } else { 448 if (mat->was_assembled || mat->assembled) { 449 if (!aij->colmap) { 450 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 451 } 452 #if defined (USE_CTABLE) 453 col = TableFind( aij->colmap, col + 1 ) - 1; 454 #else 455 col = aij->colmap[col] - 1; 456 #endif 457 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 458 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 459 col = (int) PetscReal(values[3*i+1]); 460 } 461 } 462 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 463 } 464 } 465 count--; 466 } 467 if (aij->recv_waits) PetscFree(aij->recv_waits); 468 if (aij->rvalues) PetscFree(aij->rvalues); 469 470 /* wait on sends */ 471 if (aij->nsends) { 472 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 473 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 474 PetscFree(send_status); 475 } 476 if (aij->send_waits) PetscFree(aij->send_waits); 477 if (aij->svalues) PetscFree(aij->svalues); 478 479 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 480 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 481 482 /* determine if any processor has disassembled, if so we must 483 also disassemble ourselfs, in order that we may reassemble. */ 484 /* 485 if nonzero structure of submatrix B cannot change then we know that 486 no processor disassembled thus we can skip this stuff 487 */ 488 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 489 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 490 if (mat->was_assembled && !other_disassembled) { 491 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 492 } 493 } 494 495 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 496 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 497 } 498 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 499 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 500 501 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 502 PetscFunctionReturn(0); 503 } 504 505 #undef __FUNC__ 506 #define __FUNC__ "MatZeroEntries_MPIAIJ" 507 int MatZeroEntries_MPIAIJ(Mat A) 508 { 509 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 510 int ierr; 511 512 PetscFunctionBegin; 513 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 514 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 #undef __FUNC__ 519 #define __FUNC__ "MatZeroRows_MPIAIJ" 520 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 521 { 522 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 523 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 524 int *procs,*nprocs,j,found,idx,nsends,*work,row; 525 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 526 int *rvalues,tag = A->tag,count,base,slen,n,*source; 527 int *lens,imdex,*lrows,*values,rstart=l->rstart; 528 MPI_Comm comm = A->comm; 529 MPI_Request *send_waits,*recv_waits; 530 MPI_Status recv_status,*send_status; 531 IS istmp; 532 PetscTruth localdiag; 533 534 PetscFunctionBegin; 535 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 536 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 537 538 /* first count number of contributors to each processor */ 539 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 540 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 541 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 542 for ( i=0; i<N; i++ ) { 543 idx = rows[i]; 544 found = 0; 545 for ( j=0; j<size; j++ ) { 546 if (idx >= owners[j] && idx < owners[j+1]) { 547 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 548 } 549 } 550 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 551 } 552 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 553 554 /* inform other processors of number of messages and max length*/ 555 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 556 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 557 nrecvs = work[rank]; 558 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 559 nmax = work[rank]; 560 PetscFree(work); 561 562 /* post receives: */ 563 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 564 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 565 for ( i=0; i<nrecvs; i++ ) { 566 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 567 } 568 569 /* do sends: 570 1) starts[i] gives the starting index in svalues for stuff going to 571 the ith processor 572 */ 573 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 574 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 575 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 576 starts[0] = 0; 577 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 578 for ( i=0; i<N; i++ ) { 579 svalues[starts[owner[i]]++] = rows[i]; 580 } 581 ISRestoreIndices(is,&rows); 582 583 starts[0] = 0; 584 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 585 count = 0; 586 for ( i=0; i<size; i++ ) { 587 if (procs[i]) { 588 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 589 } 590 } 591 PetscFree(starts); 592 593 base = owners[rank]; 594 595 /* wait on receives */ 596 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 597 source = lens + nrecvs; 598 count = nrecvs; slen = 0; 599 while (count) { 600 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 601 /* unpack receives into our local space */ 602 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 603 source[imdex] = recv_status.MPI_SOURCE; 604 lens[imdex] = n; 605 slen += n; 606 count--; 607 } 608 PetscFree(recv_waits); 609 610 /* move the data into the send scatter */ 611 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 612 count = 0; 613 for ( i=0; i<nrecvs; i++ ) { 614 values = rvalues + i*nmax; 615 for ( j=0; j<lens[i]; j++ ) { 616 lrows[count++] = values[j] - base; 617 } 618 } 619 PetscFree(rvalues); PetscFree(lens); 620 PetscFree(owner); PetscFree(nprocs); 621 622 /* actually zap the local rows */ 623 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 624 PLogObjectParent(A,istmp); 625 626 /* 627 Zero the required rows. If the "diagonal block" of the matrix 628 is square and the user wishes to set the diagonal we use seperate 629 code so that MatSetValues() is not called for each diagonal allocating 630 new memory, thus calling lots of mallocs and slowing things down. 631 632 Contributed by: Mathew Knepley 633 */ 634 localdiag = PETSC_FALSE; 635 if (diag && (l->A->M == l->A->N)) { 636 localdiag = PETSC_TRUE; 637 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 638 } else { 639 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 640 } 641 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 642 ierr = ISDestroy(istmp); CHKERRQ(ierr); 643 644 if (diag && (localdiag == PETSC_FALSE)) { 645 for ( i = 0; i < slen; i++ ) { 646 row = lrows[i] + rstart; 647 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 648 } 649 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 650 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 651 } 652 653 if (diag) { 654 for ( i = 0; i < slen; i++ ) { 655 row = lrows[i] + rstart; 656 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 657 } 658 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 659 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 660 } 661 PetscFree(lrows); 662 663 /* wait on sends */ 664 if (nsends) { 665 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 666 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 667 PetscFree(send_status); 668 } 669 PetscFree(send_waits); PetscFree(svalues); 670 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatMult_MPIAIJ" 676 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 679 int ierr,nt; 680 681 PetscFunctionBegin; 682 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 683 if (nt != a->n) { 684 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 685 } 686 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 687 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 688 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 689 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNC__ 694 #define __FUNC__ "MatMultAdd_MPIAIJ" 695 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 696 { 697 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 698 int ierr; 699 700 PetscFunctionBegin; 701 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 702 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 703 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 704 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNC__ 709 #define __FUNC__ "MatMultTrans_MPIAIJ" 710 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 711 { 712 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 713 int ierr; 714 715 PetscFunctionBegin; 716 /* do nondiagonal part */ 717 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 718 /* send it on its way */ 719 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 720 /* do local part */ 721 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 722 /* receive remote parts: note this assumes the values are not actually */ 723 /* inserted in yy until the next line, which is true for my implementation*/ 724 /* but is not perhaps always true. */ 725 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNC__ 730 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 731 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 732 { 733 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 734 int ierr; 735 736 PetscFunctionBegin; 737 /* do nondiagonal part */ 738 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 739 /* send it on its way */ 740 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 741 /* do local part */ 742 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 743 /* receive remote parts: note this assumes the values are not actually */ 744 /* inserted in yy until the next line, which is true for my implementation*/ 745 /* but is not perhaps always true. */ 746 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 750 /* 751 This only works correctly for square matrices where the subblock A->A is the 752 diagonal block 753 */ 754 #undef __FUNC__ 755 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 756 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 757 { 758 int ierr; 759 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 760 761 PetscFunctionBegin; 762 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 763 if (a->rstart != a->cstart || a->rend != a->cend) { 764 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 765 } 766 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } 769 770 #undef __FUNC__ 771 #define __FUNC__ "MatScale_MPIAIJ" 772 int MatScale_MPIAIJ(Scalar *aa,Mat A) 773 { 774 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 775 int ierr; 776 777 PetscFunctionBegin; 778 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 779 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNC__ 784 #define __FUNC__ "MatDestroy_MPIAIJ" 785 int MatDestroy_MPIAIJ(Mat mat) 786 { 787 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 788 int ierr; 789 790 PetscFunctionBegin; 791 if (--mat->refct > 0) PetscFunctionReturn(0); 792 793 if (mat->mapping) { 794 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 795 } 796 if (mat->bmapping) { 797 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 798 } 799 if (mat->rmap) { 800 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 801 } 802 if (mat->cmap) { 803 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 804 } 805 #if defined(USE_PETSC_LOG) 806 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 807 #endif 808 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 809 PetscFree(aij->rowners); 810 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 811 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 812 #if defined (USE_CTABLE) 813 if (aij->colmap) TableDelete(aij->colmap); 814 #else 815 if (aij->colmap) PetscFree(aij->colmap); 816 #endif 817 if (aij->garray) PetscFree(aij->garray); 818 if (aij->lvec) VecDestroy(aij->lvec); 819 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 820 if (aij->rowvalues) PetscFree(aij->rowvalues); 821 PetscFree(aij); 822 PLogObjectDestroy(mat); 823 PetscHeaderDestroy(mat); 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNC__ 828 #define __FUNC__ "MatView_MPIAIJ_Binary" 829 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 830 { 831 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 832 int ierr; 833 834 PetscFunctionBegin; 835 if (aij->size == 1) { 836 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 837 } 838 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 839 PetscFunctionReturn(0); 840 } 841 842 #undef __FUNC__ 843 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 844 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 845 { 846 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 847 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 848 int ierr, format,shift = C->indexshift,rank = aij->rank ; 849 int size = aij->size; 850 FILE *fd; 851 ViewerType vtype; 852 853 PetscFunctionBegin; 854 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 855 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 856 ierr = ViewerGetFormat(viewer,&format); 857 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 858 MatInfo info; 859 int flg; 860 MPI_Comm_rank(mat->comm,&rank); 861 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 862 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 863 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 864 PetscSequentialPhaseBegin(mat->comm,1); 865 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 866 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 867 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 868 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 869 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 870 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 871 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 872 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 873 fflush(fd); 874 PetscSequentialPhaseEnd(mat->comm,1); 875 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 878 PetscFunctionReturn(0); 879 } 880 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 881 Draw draw; 882 PetscTruth isnull; 883 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 884 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 885 } 886 887 if (size == 1) { 888 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 889 } else { 890 /* assemble the entire matrix onto first processor. */ 891 Mat A; 892 Mat_SeqAIJ *Aloc; 893 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 894 Scalar *a; 895 896 if (!rank) { 897 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 898 } else { 899 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 900 } 901 PLogObjectParent(mat,A); 902 903 /* copy over the A part */ 904 Aloc = (Mat_SeqAIJ*) aij->A->data; 905 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 906 row = aij->rstart; 907 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 908 for ( i=0; i<m; i++ ) { 909 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 910 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 911 } 912 aj = Aloc->j; 913 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 914 915 /* copy over the B part */ 916 Aloc = (Mat_SeqAIJ*) aij->B->data; 917 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 918 row = aij->rstart; 919 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 920 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 921 for ( i=0; i<m; i++ ) { 922 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 923 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 924 } 925 PetscFree(ct); 926 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 927 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 928 /* 929 Everyone has to call to draw the matrix since the graphics waits are 930 synchronized across all processors that share the Draw object 931 */ 932 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 933 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 934 } 935 ierr = MatDestroy(A); CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNC__ 941 #define __FUNC__ "MatView_MPIAIJ" 942 int MatView_MPIAIJ(Mat mat,Viewer viewer) 943 { 944 int ierr; 945 ViewerType vtype; 946 947 PetscFunctionBegin; 948 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 949 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 950 PetscTypeCompare(vtype,MATLAB_VIEWER)) { 951 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 952 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 953 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 954 } else { 955 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 956 } 957 PetscFunctionReturn(0); 958 } 959 960 /* 961 This has to provide several versions. 962 963 2) a) use only local smoothing updating outer values only once. 964 b) local smoothing updating outer values each inner iteration 965 3) color updating out values betwen colors. 966 */ 967 #undef __FUNC__ 968 #define __FUNC__ "MatRelax_MPIAIJ" 969 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 970 double fshift,int its,Vec xx) 971 { 972 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 973 Mat AA = mat->A, BB = mat->B; 974 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 975 Scalar *b,*x,*xs,*ls,d,*v,sum; 976 int ierr,*idx, *diag; 977 int n = mat->n, m = mat->m, i,shift = A->indexshift; 978 979 PetscFunctionBegin; 980 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 981 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 982 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 983 xs = x + shift; /* shift by one for index start of 1 */ 984 ls = ls + shift; 985 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 986 diag = A->diag; 987 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 988 if (flag & SOR_ZERO_INITIAL_GUESS) { 989 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 990 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 991 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 992 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 996 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 997 while (its--) { 998 /* go down through the rows */ 999 for ( i=0; i<m; i++ ) { 1000 n = A->i[i+1] - A->i[i]; 1001 PLogFlops(4*n+3); 1002 idx = A->j + A->i[i] + shift; 1003 v = A->a + A->i[i] + shift; 1004 sum = b[i]; 1005 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1006 d = fshift + A->a[diag[i]+shift]; 1007 n = B->i[i+1] - B->i[i]; 1008 idx = B->j + B->i[i] + shift; 1009 v = B->a + B->i[i] + shift; 1010 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1011 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1012 } 1013 /* come up through the rows */ 1014 for ( i=m-1; i>-1; i-- ) { 1015 n = A->i[i+1] - A->i[i]; 1016 PLogFlops(4*n+3) 1017 idx = A->j + A->i[i] + shift; 1018 v = A->a + A->i[i] + shift; 1019 sum = b[i]; 1020 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1021 d = fshift + A->a[diag[i]+shift]; 1022 n = B->i[i+1] - B->i[i]; 1023 idx = B->j + B->i[i] + shift; 1024 v = B->a + B->i[i] + shift; 1025 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1026 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1027 } 1028 } 1029 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1030 if (flag & SOR_ZERO_INITIAL_GUESS) { 1031 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1032 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1033 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1034 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1039 while (its--) { 1040 for ( i=0; i<m; i++ ) { 1041 n = A->i[i+1] - A->i[i]; 1042 PLogFlops(4*n+3); 1043 idx = A->j + A->i[i] + shift; 1044 v = A->a + A->i[i] + shift; 1045 sum = b[i]; 1046 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1047 d = fshift + A->a[diag[i]+shift]; 1048 n = B->i[i+1] - B->i[i]; 1049 idx = B->j + B->i[i] + shift; 1050 v = B->a + B->i[i] + shift; 1051 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1052 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1053 } 1054 } 1055 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1056 if (flag & SOR_ZERO_INITIAL_GUESS) { 1057 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1058 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1059 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1060 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1064 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1065 while (its--) { 1066 for ( i=m-1; i>-1; i-- ) { 1067 n = A->i[i+1] - A->i[i]; 1068 PLogFlops(4*n+3); 1069 idx = A->j + A->i[i] + shift; 1070 v = A->a + A->i[i] + shift; 1071 sum = b[i]; 1072 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1073 d = fshift + A->a[diag[i]+shift]; 1074 n = B->i[i+1] - B->i[i]; 1075 idx = B->j + B->i[i] + shift; 1076 v = B->a + B->i[i] + shift; 1077 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1078 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1079 } 1080 } 1081 } else { 1082 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1083 } 1084 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1085 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1086 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNC__ 1091 #define __FUNC__ "MatGetInfo_MPIAIJ" 1092 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1093 { 1094 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1095 Mat A = mat->A, B = mat->B; 1096 int ierr; 1097 double isend[5], irecv[5]; 1098 1099 PetscFunctionBegin; 1100 info->block_size = 1.0; 1101 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1102 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1103 isend[3] = info->memory; isend[4] = info->mallocs; 1104 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1105 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1106 isend[3] += info->memory; isend[4] += info->mallocs; 1107 if (flag == MAT_LOCAL) { 1108 info->nz_used = isend[0]; 1109 info->nz_allocated = isend[1]; 1110 info->nz_unneeded = isend[2]; 1111 info->memory = isend[3]; 1112 info->mallocs = isend[4]; 1113 } else if (flag == MAT_GLOBAL_MAX) { 1114 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1115 info->nz_used = irecv[0]; 1116 info->nz_allocated = irecv[1]; 1117 info->nz_unneeded = irecv[2]; 1118 info->memory = irecv[3]; 1119 info->mallocs = irecv[4]; 1120 } else if (flag == MAT_GLOBAL_SUM) { 1121 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1122 info->nz_used = irecv[0]; 1123 info->nz_allocated = irecv[1]; 1124 info->nz_unneeded = irecv[2]; 1125 info->memory = irecv[3]; 1126 info->mallocs = irecv[4]; 1127 } 1128 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1129 info->fill_ratio_needed = 0; 1130 info->factor_mallocs = 0; 1131 info->rows_global = (double)mat->M; 1132 info->columns_global = (double)mat->N; 1133 info->rows_local = (double)mat->m; 1134 info->columns_local = (double)mat->N; 1135 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNC__ 1140 #define __FUNC__ "MatSetOption_MPIAIJ" 1141 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1142 { 1143 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1144 1145 PetscFunctionBegin; 1146 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1147 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1148 op == MAT_COLUMNS_UNSORTED || 1149 op == MAT_COLUMNS_SORTED || 1150 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1151 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1152 MatSetOption(a->A,op); 1153 MatSetOption(a->B,op); 1154 } else if (op == MAT_ROW_ORIENTED) { 1155 a->roworiented = 1; 1156 MatSetOption(a->A,op); 1157 MatSetOption(a->B,op); 1158 } else if (op == MAT_ROWS_SORTED || 1159 op == MAT_ROWS_UNSORTED || 1160 op == MAT_SYMMETRIC || 1161 op == MAT_STRUCTURALLY_SYMMETRIC || 1162 op == MAT_YES_NEW_DIAGONALS) 1163 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1164 else if (op == MAT_COLUMN_ORIENTED) { 1165 a->roworiented = 0; 1166 MatSetOption(a->A,op); 1167 MatSetOption(a->B,op); 1168 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1169 a->donotstash = 1; 1170 } else if (op == MAT_NO_NEW_DIAGONALS){ 1171 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1172 } else { 1173 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1174 } 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNC__ 1179 #define __FUNC__ "MatGetSize_MPIAIJ" 1180 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1181 { 1182 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1183 1184 PetscFunctionBegin; 1185 if (m) *m = mat->M; 1186 if (n) *n = mat->N; 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNC__ 1191 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1192 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1193 { 1194 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1195 1196 PetscFunctionBegin; 1197 if (m) *m = mat->m; 1198 if (n) *n = mat->n; 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNC__ 1203 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1204 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1205 { 1206 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1207 1208 PetscFunctionBegin; 1209 *m = mat->rstart; *n = mat->rend; 1210 PetscFunctionReturn(0); 1211 } 1212 1213 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1214 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1215 1216 #undef __FUNC__ 1217 #define __FUNC__ "MatGetRow_MPIAIJ" 1218 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1219 { 1220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1221 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1222 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1223 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1224 int *cmap, *idx_p; 1225 1226 PetscFunctionBegin; 1227 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1228 mat->getrowactive = PETSC_TRUE; 1229 1230 if (!mat->rowvalues && (idx || v)) { 1231 /* 1232 allocate enough space to hold information from the longest row. 1233 */ 1234 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1235 int max = 1,m = mat->m,tmp; 1236 for ( i=0; i<m; i++ ) { 1237 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1238 if (max < tmp) { max = tmp; } 1239 } 1240 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1241 mat->rowindices = (int *) (mat->rowvalues + max); 1242 } 1243 1244 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1245 lrow = row - rstart; 1246 1247 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1248 if (!v) {pvA = 0; pvB = 0;} 1249 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1250 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1251 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1252 nztot = nzA + nzB; 1253 1254 cmap = mat->garray; 1255 if (v || idx) { 1256 if (nztot) { 1257 /* Sort by increasing column numbers, assuming A and B already sorted */ 1258 int imark = -1; 1259 if (v) { 1260 *v = v_p = mat->rowvalues; 1261 for ( i=0; i<nzB; i++ ) { 1262 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1263 else break; 1264 } 1265 imark = i; 1266 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1267 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1268 } 1269 if (idx) { 1270 *idx = idx_p = mat->rowindices; 1271 if (imark > -1) { 1272 for ( i=0; i<imark; i++ ) { 1273 idx_p[i] = cmap[cworkB[i]]; 1274 } 1275 } else { 1276 for ( i=0; i<nzB; i++ ) { 1277 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1278 else break; 1279 } 1280 imark = i; 1281 } 1282 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1283 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1284 } 1285 } else { 1286 if (idx) *idx = 0; 1287 if (v) *v = 0; 1288 } 1289 } 1290 *nz = nztot; 1291 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1292 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNC__ 1297 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1298 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1299 { 1300 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1301 1302 PetscFunctionBegin; 1303 if (aij->getrowactive == PETSC_FALSE) { 1304 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1305 } 1306 aij->getrowactive = PETSC_FALSE; 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNC__ 1311 #define __FUNC__ "MatNorm_MPIAIJ" 1312 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1315 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1316 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1317 double sum = 0.0; 1318 Scalar *v; 1319 1320 PetscFunctionBegin; 1321 if (aij->size == 1) { 1322 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1323 } else { 1324 if (type == NORM_FROBENIUS) { 1325 v = amat->a; 1326 for (i=0; i<amat->nz; i++ ) { 1327 #if defined(USE_PETSC_COMPLEX) 1328 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1329 #else 1330 sum += (*v)*(*v); v++; 1331 #endif 1332 } 1333 v = bmat->a; 1334 for (i=0; i<bmat->nz; i++ ) { 1335 #if defined(USE_PETSC_COMPLEX) 1336 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1337 #else 1338 sum += (*v)*(*v); v++; 1339 #endif 1340 } 1341 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1342 *norm = sqrt(*norm); 1343 } else if (type == NORM_1) { /* max column norm */ 1344 double *tmp, *tmp2; 1345 int *jj, *garray = aij->garray; 1346 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1347 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1348 PetscMemzero(tmp,aij->N*sizeof(double)); 1349 *norm = 0.0; 1350 v = amat->a; jj = amat->j; 1351 for ( j=0; j<amat->nz; j++ ) { 1352 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1353 } 1354 v = bmat->a; jj = bmat->j; 1355 for ( j=0; j<bmat->nz; j++ ) { 1356 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1357 } 1358 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1359 for ( j=0; j<aij->N; j++ ) { 1360 if (tmp2[j] > *norm) *norm = tmp2[j]; 1361 } 1362 PetscFree(tmp); PetscFree(tmp2); 1363 } else if (type == NORM_INFINITY) { /* max row norm */ 1364 double ntemp = 0.0; 1365 for ( j=0; j<amat->m; j++ ) { 1366 v = amat->a + amat->i[j] + shift; 1367 sum = 0.0; 1368 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1369 sum += PetscAbsScalar(*v); v++; 1370 } 1371 v = bmat->a + bmat->i[j] + shift; 1372 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1373 sum += PetscAbsScalar(*v); v++; 1374 } 1375 if (sum > ntemp) ntemp = sum; 1376 } 1377 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1378 } else { 1379 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1380 } 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNC__ 1386 #define __FUNC__ "MatTranspose_MPIAIJ" 1387 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1388 { 1389 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1390 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1391 int ierr,shift = Aloc->indexshift; 1392 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1393 Mat B; 1394 Scalar *array; 1395 1396 PetscFunctionBegin; 1397 if (matout == PETSC_NULL && M != N) { 1398 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1399 } 1400 1401 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1402 1403 /* copy over the A part */ 1404 Aloc = (Mat_SeqAIJ*) a->A->data; 1405 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1406 row = a->rstart; 1407 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1408 for ( i=0; i<m; i++ ) { 1409 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1410 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1411 } 1412 aj = Aloc->j; 1413 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1414 1415 /* copy over the B part */ 1416 Aloc = (Mat_SeqAIJ*) a->B->data; 1417 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1418 row = a->rstart; 1419 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1420 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1421 for ( i=0; i<m; i++ ) { 1422 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1423 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1424 } 1425 PetscFree(ct); 1426 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1427 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1428 if (matout != PETSC_NULL) { 1429 *matout = B; 1430 } else { 1431 PetscOps *Abops; 1432 struct _MatOps *Aops; 1433 1434 /* This isn't really an in-place transpose .... but free data structures from a */ 1435 PetscFree(a->rowners); 1436 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1437 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1438 #if defined (USE_CTABLE) 1439 if (a->colmap) TableDelete(a->colmap); 1440 #else 1441 if (a->colmap) PetscFree(a->colmap); 1442 #endif 1443 if (a->garray) PetscFree(a->garray); 1444 if (a->lvec) VecDestroy(a->lvec); 1445 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1446 PetscFree(a); 1447 1448 /* 1449 This is horrible, horrible code. We need to keep the 1450 A pointers for the bops and ops but copy everything 1451 else from C. 1452 */ 1453 Abops = A->bops; 1454 Aops = A->ops; 1455 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1456 A->bops = Abops; 1457 A->ops = Aops; 1458 PetscHeaderDestroy(B); 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNC__ 1464 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1465 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1466 { 1467 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1468 Mat a = aij->A, b = aij->B; 1469 int ierr,s1,s2,s3; 1470 1471 PetscFunctionBegin; 1472 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1473 if (rr) { 1474 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1475 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1476 /* Overlap communication with computation. */ 1477 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1478 } 1479 if (ll) { 1480 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1481 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1482 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1483 } 1484 /* scale the diagonal block */ 1485 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1486 1487 if (rr) { 1488 /* Do a scatter end and then right scale the off-diagonal block */ 1489 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1490 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1491 } 1492 1493 PetscFunctionReturn(0); 1494 } 1495 1496 1497 extern int MatPrintHelp_SeqAIJ(Mat); 1498 #undef __FUNC__ 1499 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1500 int MatPrintHelp_MPIAIJ(Mat A) 1501 { 1502 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1503 int ierr; 1504 1505 PetscFunctionBegin; 1506 if (!a->rank) { 1507 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNC__ 1513 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1514 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1515 { 1516 PetscFunctionBegin; 1517 *bs = 1; 1518 PetscFunctionReturn(0); 1519 } 1520 #undef __FUNC__ 1521 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1522 int MatSetUnfactored_MPIAIJ(Mat A) 1523 { 1524 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1525 int ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNC__ 1533 #define __FUNC__ "MatEqual_MPIAIJ" 1534 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1535 { 1536 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1537 Mat a, b, c, d; 1538 PetscTruth flg; 1539 int ierr; 1540 1541 PetscFunctionBegin; 1542 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1543 a = matA->A; b = matA->B; 1544 c = matB->A; d = matB->B; 1545 1546 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1547 if (flg == PETSC_TRUE) { 1548 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1549 } 1550 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 #undef __FUNC__ 1555 #define __FUNC__ "MatCopy_MPIAIJ" 1556 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1557 { 1558 int ierr; 1559 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1560 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1561 1562 PetscFunctionBegin; 1563 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1564 /* because of the column compression in the off-processor part of the matrix a->B, 1565 the number of columns in a->B and b->B may be different, hence we cannot call 1566 the MatCopy() directly on the two parts. If need be, we can provide a more 1567 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1568 then copying the submatrices */ 1569 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1570 } else { 1571 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1572 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1573 } 1574 PetscFunctionReturn(0); 1575 } 1576 1577 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1578 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1579 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1580 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1581 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1582 1583 /* -------------------------------------------------------------------*/ 1584 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1585 MatGetRow_MPIAIJ, 1586 MatRestoreRow_MPIAIJ, 1587 MatMult_MPIAIJ, 1588 MatMultAdd_MPIAIJ, 1589 MatMultTrans_MPIAIJ, 1590 MatMultTransAdd_MPIAIJ, 1591 0, 1592 0, 1593 0, 1594 0, 1595 0, 1596 0, 1597 MatRelax_MPIAIJ, 1598 MatTranspose_MPIAIJ, 1599 MatGetInfo_MPIAIJ, 1600 MatEqual_MPIAIJ, 1601 MatGetDiagonal_MPIAIJ, 1602 MatDiagonalScale_MPIAIJ, 1603 MatNorm_MPIAIJ, 1604 MatAssemblyBegin_MPIAIJ, 1605 MatAssemblyEnd_MPIAIJ, 1606 0, 1607 MatSetOption_MPIAIJ, 1608 MatZeroEntries_MPIAIJ, 1609 MatZeroRows_MPIAIJ, 1610 0, 1611 0, 1612 0, 1613 0, 1614 MatGetSize_MPIAIJ, 1615 MatGetLocalSize_MPIAIJ, 1616 MatGetOwnershipRange_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 0, 1621 MatDuplicate_MPIAIJ, 1622 0, 1623 0, 1624 0, 1625 0, 1626 0, 1627 MatGetSubMatrices_MPIAIJ, 1628 MatIncreaseOverlap_MPIAIJ, 1629 MatGetValues_MPIAIJ, 1630 MatCopy_MPIAIJ, 1631 MatPrintHelp_MPIAIJ, 1632 MatScale_MPIAIJ, 1633 0, 1634 0, 1635 0, 1636 MatGetBlockSize_MPIAIJ, 1637 0, 1638 0, 1639 0, 1640 0, 1641 MatFDColoringCreate_MPIAIJ, 1642 0, 1643 MatSetUnfactored_MPIAIJ, 1644 0, 1645 0, 1646 MatGetSubMatrix_MPIAIJ, 1647 0, 1648 0, 1649 MatGetMaps_Petsc}; 1650 1651 /* ----------------------------------------------------------------------------------------*/ 1652 1653 EXTERN_C_BEGIN 1654 #undef __FUNC__ 1655 #define __FUNC__ "MatStoreValues_MPIAIJ" 1656 int MatStoreValues_MPIAIJ(Mat mat) 1657 { 1658 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1659 int ierr; 1660 1661 PetscFunctionBegin; 1662 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1663 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1664 PetscFunctionReturn(0); 1665 } 1666 EXTERN_C_END 1667 1668 EXTERN_C_BEGIN 1669 #undef __FUNC__ 1670 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1671 int MatRetrieveValues_MPIAIJ(Mat mat) 1672 { 1673 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1674 int ierr; 1675 1676 PetscFunctionBegin; 1677 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1678 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 EXTERN_C_END 1682 1683 #include "pc.h" 1684 EXTERN_C_BEGIN 1685 extern int PCSetUp_BJacobi_AIJ(PC); 1686 EXTERN_C_END 1687 1688 #undef __FUNC__ 1689 #define __FUNC__ "MatCreateMPIAIJ" 1690 /*@C 1691 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1692 (the default parallel PETSc format). For good matrix assembly performance 1693 the user should preallocate the matrix storage by setting the parameters 1694 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1695 performance can be increased by more than a factor of 50. 1696 1697 Collective on MPI_Comm 1698 1699 Input Parameters: 1700 + comm - MPI communicator 1701 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1702 This value should be the same as the local size used in creating the 1703 y vector for the matrix-vector product y = Ax. 1704 . n - This value should be the same as the local size used in creating the 1705 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1706 calculated if N is given) For square matrices n is almost always m. 1707 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1708 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1709 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1710 (same value is used for all local rows) 1711 . d_nnz - array containing the number of nonzeros in the various rows of the 1712 DIAGONAL portion of the local submatrix (possibly different for each row) 1713 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1714 The size of this array is equal to the number of local rows, i.e 'm'. 1715 You must leave room for the diagonal entry even if it is zero. 1716 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1717 submatrix (same value is used for all local rows). 1718 - o_nnz - array containing the number of nonzeros in the various rows of the 1719 OFF-DIAGONAL portion of the local submatrix (possibly different for 1720 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1721 structure. The size of this array is equal to the number 1722 of local rows, i.e 'm'. 1723 1724 Output Parameter: 1725 . A - the matrix 1726 1727 Notes: 1728 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1729 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1730 storage requirements for this matrix. 1731 1732 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1733 processor than it must be used on all processors that share the object for 1734 that argument. 1735 1736 The AIJ format (also called the Yale sparse matrix format or 1737 compressed row storage), is fully compatible with standard Fortran 77 1738 storage. That is, the stored row and column indices can begin at 1739 either one (as in Fortran) or zero. See the users manual for details. 1740 1741 The user MUST specify either the local or global matrix dimensions 1742 (possibly both). 1743 1744 The parallel matrix is partitioned such that the first m0 rows belong to 1745 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1746 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1747 1748 The DIAGONAL portion of the local submatrix of a processor can be defined 1749 as the submatrix which is obtained by extraction the part corresponding 1750 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1751 first row that belongs to the processor, and r2 is the last row belonging 1752 to the this processor. This is a square mxm matrix. The remaining portion 1753 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1754 1755 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1756 1757 By default, this format uses inodes (identical nodes) when possible. 1758 We search for consecutive rows with the same nonzero structure, thereby 1759 reusing matrix information to achieve increased efficiency. 1760 1761 Options Database Keys: 1762 + -mat_aij_no_inode - Do not use inodes 1763 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1764 - -mat_aij_oneindex - Internally use indexing starting at 1 1765 rather than 0. Note that when calling MatSetValues(), 1766 the user still MUST index entries starting at 0! 1767 . -mat_mpi - use the parallel matrix data structures even on one processor 1768 (defaults to using SeqBAIJ format on one processor) 1769 . -mat_mpi - use the parallel matrix data structures even on one processor 1770 (defaults to using SeqAIJ format on one processor) 1771 1772 1773 Example usage: 1774 1775 Consider the following 8x8 matrix with 34 non-zero values, that is 1776 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1777 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1778 as follows: 1779 1780 .vb 1781 1 2 0 | 0 3 0 | 0 4 1782 Proc0 0 5 6 | 7 0 0 | 8 0 1783 9 0 10 | 11 0 0 | 12 0 1784 ------------------------------------- 1785 13 0 14 | 15 16 17 | 0 0 1786 Proc1 0 18 0 | 19 20 21 | 0 0 1787 0 0 0 | 22 23 0 | 24 0 1788 ------------------------------------- 1789 Proc2 25 26 27 | 0 0 28 | 29 0 1790 30 0 0 | 31 32 33 | 0 34 1791 .ve 1792 1793 This can be represented as a collection of submatrices as: 1794 1795 .vb 1796 A B C 1797 D E F 1798 G H I 1799 .ve 1800 1801 Where the submatrices A,B,C are owned by proc0, D,E,F are 1802 owned by proc1, G,H,I are owned by proc2. 1803 1804 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1805 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1806 The 'M','N' parameters are 8,8, and have the same values on all procs. 1807 1808 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1809 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1810 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1811 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1812 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1813 matrix, ans [DF] as another SeqAIJ matrix. 1814 1815 When d_nz, o_nz parameters are specified, d_nz storage elements are 1816 allocated for every row of the local diagonal submatrix, and o_nz 1817 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1818 One way to choose d_nz and o_nz is to use the max nonzerors per local 1819 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1820 In this case, the values of d_nz,o_nz are: 1821 .vb 1822 proc0 : dnz = 2, o_nz = 2 1823 proc1 : dnz = 3, o_nz = 2 1824 proc2 : dnz = 1, o_nz = 4 1825 .ve 1826 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1827 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1828 for proc3. i.e we are using 12+15+10=37 storage locations to store 1829 34 values. 1830 1831 When d_nnz, o_nnz parameters are specified, the storage is specified 1832 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1833 In the above case the values for d_nnz,o_nnz are: 1834 .vb 1835 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1836 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1837 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1838 .ve 1839 Here the space allocated is sum of all the above values i.e 34, and 1840 hence pre-allocation is perfect. 1841 1842 .keywords: matrix, aij, compressed row, sparse, parallel 1843 1844 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1845 @*/ 1846 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1847 { 1848 Mat B; 1849 Mat_MPIAIJ *b; 1850 int ierr, i,sum[2],work[2],size,flag1 = 0, flag2 = 0; 1851 1852 PetscFunctionBegin; 1853 MPI_Comm_size(comm,&size); 1854 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1855 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1856 if (!flag1 && !flag2 && size == 1) { 1857 if (M == PETSC_DECIDE) M = m; 1858 if (N == PETSC_DECIDE) N = n; 1859 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 *A = 0; 1864 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1865 PLogObjectCreate(B); 1866 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1867 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1868 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1869 B->ops->destroy = MatDestroy_MPIAIJ; 1870 B->ops->view = MatView_MPIAIJ; 1871 B->factor = 0; 1872 B->assembled = PETSC_FALSE; 1873 B->mapping = 0; 1874 1875 B->insertmode = NOT_SET_VALUES; 1876 b->size = size; 1877 MPI_Comm_rank(comm,&b->rank); 1878 1879 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1880 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1881 } 1882 1883 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1884 work[0] = m; work[1] = n; 1885 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1886 if (M == PETSC_DECIDE) M = sum[0]; 1887 if (N == PETSC_DECIDE) N = sum[1]; 1888 } 1889 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1890 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1891 b->m = m; B->m = m; 1892 b->n = n; B->n = n; 1893 b->N = N; B->N = N; 1894 b->M = M; B->M = M; 1895 1896 /* the information in the maps duplicates the information computed below, eventually 1897 we should remove the duplicate information that is not contained in the maps */ 1898 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1899 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1900 1901 /* build local table of row and column ownerships */ 1902 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1903 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1904 b->cowners = b->rowners + b->size + 2; 1905 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1906 b->rowners[0] = 0; 1907 for ( i=2; i<=b->size; i++ ) { 1908 b->rowners[i] += b->rowners[i-1]; 1909 } 1910 b->rstart = b->rowners[b->rank]; 1911 b->rend = b->rowners[b->rank+1]; 1912 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1913 b->cowners[0] = 0; 1914 for ( i=2; i<=b->size; i++ ) { 1915 b->cowners[i] += b->cowners[i-1]; 1916 } 1917 b->cstart = b->cowners[b->rank]; 1918 b->cend = b->cowners[b->rank+1]; 1919 1920 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1921 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1922 PLogObjectParent(B,b->A); 1923 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1924 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1925 PLogObjectParent(B,b->B); 1926 1927 /* build cache for off array entries formed */ 1928 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1929 b->donotstash = 0; 1930 b->colmap = 0; 1931 b->garray = 0; 1932 b->roworiented = 1; 1933 1934 /* stuff used for matrix vector multiply */ 1935 b->lvec = 0; 1936 b->Mvctx = 0; 1937 1938 /* stuff for MatGetRow() */ 1939 b->rowindices = 0; 1940 b->rowvalues = 0; 1941 b->getrowactive = PETSC_FALSE; 1942 1943 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1944 "MatStoreValues_MPIAIJ", 1945 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1946 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1947 "MatRetrieveValues_MPIAIJ", 1948 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1949 ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C", 1950 "PCSetUp_BJacobi_AIJ", 1951 (void*)PCSetUp_BJacobi_AIJ);CHKERRQ(ierr); 1952 *A = B; 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNC__ 1957 #define __FUNC__ "MatDuplicate_MPIAIJ" 1958 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1959 { 1960 Mat mat; 1961 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1962 int ierr, len=0, flg; 1963 1964 PetscFunctionBegin; 1965 *newmat = 0; 1966 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1967 PLogObjectCreate(mat); 1968 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1969 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1970 mat->ops->destroy = MatDestroy_MPIAIJ; 1971 mat->ops->view = MatView_MPIAIJ; 1972 mat->factor = matin->factor; 1973 mat->assembled = PETSC_TRUE; 1974 1975 a->m = mat->m = oldmat->m; 1976 a->n = mat->n = oldmat->n; 1977 a->M = mat->M = oldmat->M; 1978 a->N = mat->N = oldmat->N; 1979 1980 a->rstart = oldmat->rstart; 1981 a->rend = oldmat->rend; 1982 a->cstart = oldmat->cstart; 1983 a->cend = oldmat->cend; 1984 a->size = oldmat->size; 1985 a->rank = oldmat->rank; 1986 mat->insertmode = NOT_SET_VALUES; 1987 a->rowvalues = 0; 1988 a->getrowactive = PETSC_FALSE; 1989 1990 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1991 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1992 a->cowners = a->rowners + a->size + 2; 1993 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1994 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1995 if (oldmat->colmap) { 1996 #if defined (USE_CTABLE) 1997 ierr = TableCreateCopy( &a->colmap, oldmat->colmap ); CHKERRQ(ierr); 1998 #else 1999 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 2000 PLogObjectMemory(mat,(a->N)*sizeof(int)); 2001 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 2002 #endif 2003 } else a->colmap = 0; 2004 if (oldmat->garray) { 2005 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 2006 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 2007 PLogObjectMemory(mat,len*sizeof(int)); 2008 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 2009 } else a->garray = 0; 2010 2011 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 2012 PLogObjectParent(mat,a->lvec); 2013 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 2014 PLogObjectParent(mat,a->Mvctx); 2015 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 2016 PLogObjectParent(mat,a->A); 2017 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 2018 PLogObjectParent(mat,a->B); 2019 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 2020 if (flg) { 2021 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 2022 } 2023 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 2024 *newmat = mat; 2025 PetscFunctionReturn(0); 2026 } 2027 2028 #include "sys.h" 2029 2030 #undef __FUNC__ 2031 #define __FUNC__ "MatLoad_MPIAIJ" 2032 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2033 { 2034 Mat A; 2035 Scalar *vals,*svals; 2036 MPI_Comm comm = ((PetscObject)viewer)->comm; 2037 MPI_Status status; 2038 int i, nz, ierr, j,rstart, rend, fd; 2039 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2040 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2041 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2042 2043 PetscFunctionBegin; 2044 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2045 if (!rank) { 2046 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2047 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2048 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2049 if (header[3] < 0) { 2050 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2051 } 2052 } 2053 2054 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2055 M = header[1]; N = header[2]; 2056 /* determine ownership of all rows */ 2057 m = M/size + ((M % size) > rank); 2058 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2059 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2060 rowners[0] = 0; 2061 for ( i=2; i<=size; i++ ) { 2062 rowners[i] += rowners[i-1]; 2063 } 2064 rstart = rowners[rank]; 2065 rend = rowners[rank+1]; 2066 2067 /* distribute row lengths to all processors */ 2068 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2069 offlens = ourlens + (rend-rstart); 2070 if (!rank) { 2071 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2072 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2073 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2074 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2075 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2076 PetscFree(sndcounts); 2077 } else { 2078 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2079 } 2080 2081 if (!rank) { 2082 /* calculate the number of nonzeros on each processor */ 2083 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2084 PetscMemzero(procsnz,size*sizeof(int)); 2085 for ( i=0; i<size; i++ ) { 2086 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2087 procsnz[i] += rowlengths[j]; 2088 } 2089 } 2090 PetscFree(rowlengths); 2091 2092 /* determine max buffer needed and allocate it */ 2093 maxnz = 0; 2094 for ( i=0; i<size; i++ ) { 2095 maxnz = PetscMax(maxnz,procsnz[i]); 2096 } 2097 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2098 2099 /* read in my part of the matrix column indices */ 2100 nz = procsnz[0]; 2101 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2102 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2103 2104 /* read in every one elses and ship off */ 2105 for ( i=1; i<size; i++ ) { 2106 nz = procsnz[i]; 2107 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2108 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2109 } 2110 PetscFree(cols); 2111 } else { 2112 /* determine buffer space needed for message */ 2113 nz = 0; 2114 for ( i=0; i<m; i++ ) { 2115 nz += ourlens[i]; 2116 } 2117 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2118 2119 /* receive message of column indices*/ 2120 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2121 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2122 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2123 } 2124 2125 /* determine column ownership if matrix is not square */ 2126 if (N != M) { 2127 n = N/size + ((N % size) > rank); 2128 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2129 cstart = cend - n; 2130 } else { 2131 cstart = rstart; 2132 cend = rend; 2133 n = cend - cstart; 2134 } 2135 2136 /* loop over local rows, determining number of off diagonal entries */ 2137 PetscMemzero(offlens,m*sizeof(int)); 2138 jj = 0; 2139 for ( i=0; i<m; i++ ) { 2140 for ( j=0; j<ourlens[i]; j++ ) { 2141 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2142 jj++; 2143 } 2144 } 2145 2146 /* create our matrix */ 2147 for ( i=0; i<m; i++ ) { 2148 ourlens[i] -= offlens[i]; 2149 } 2150 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2151 A = *newmat; 2152 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2153 for ( i=0; i<m; i++ ) { 2154 ourlens[i] += offlens[i]; 2155 } 2156 2157 if (!rank) { 2158 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2159 2160 /* read in my part of the matrix numerical values */ 2161 nz = procsnz[0]; 2162 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2163 2164 /* insert into matrix */ 2165 jj = rstart; 2166 smycols = mycols; 2167 svals = vals; 2168 for ( i=0; i<m; i++ ) { 2169 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2170 smycols += ourlens[i]; 2171 svals += ourlens[i]; 2172 jj++; 2173 } 2174 2175 /* read in other processors and ship out */ 2176 for ( i=1; i<size; i++ ) { 2177 nz = procsnz[i]; 2178 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2179 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2180 } 2181 PetscFree(procsnz); 2182 } else { 2183 /* receive numeric values */ 2184 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2185 2186 /* receive message of values*/ 2187 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2188 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2189 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2190 2191 /* insert into matrix */ 2192 jj = rstart; 2193 smycols = mycols; 2194 svals = vals; 2195 for ( i=0; i<m; i++ ) { 2196 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2197 smycols += ourlens[i]; 2198 svals += ourlens[i]; 2199 jj++; 2200 } 2201 } 2202 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2203 2204 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2205 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 #undef __FUNC__ 2210 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2211 /* 2212 Not great since it makes two copies of the submatrix, first an SeqAIJ 2213 in local and then by concatenating the local matrices the end result. 2214 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2215 */ 2216 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 2217 { 2218 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2219 Mat *local,M, Mreuse; 2220 Scalar *vwork,*aa; 2221 MPI_Comm comm = mat->comm; 2222 Mat_SeqAIJ *aij; 2223 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2224 2225 PetscFunctionBegin; 2226 MPI_Comm_rank(comm,&rank); 2227 MPI_Comm_size(comm,&size); 2228 2229 if (call == MAT_REUSE_MATRIX) { 2230 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2231 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2232 local = &Mreuse; 2233 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2234 } else { 2235 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2236 Mreuse = *local; 2237 PetscFree(local); 2238 } 2239 2240 /* 2241 m - number of local rows 2242 n - number of columns (same on all processors) 2243 rstart - first row in new global matrix generated 2244 */ 2245 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2246 if (call == MAT_INITIAL_MATRIX) { 2247 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2248 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2249 ii = aij->i; 2250 jj = aij->j; 2251 2252 /* 2253 Determine the number of non-zeros in the diagonal and off-diagonal 2254 portions of the matrix in order to do correct preallocation 2255 */ 2256 2257 /* first get start and end of "diagonal" columns */ 2258 if (csize == PETSC_DECIDE) { 2259 nlocal = n/size + ((n % size) > rank); 2260 } else { 2261 nlocal = csize; 2262 } 2263 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2264 rstart = rend - nlocal; 2265 if (rank == size - 1 && rend != n) { 2266 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2267 } 2268 2269 /* next, compute all the lengths */ 2270 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2271 olens = dlens + m; 2272 for ( i=0; i<m; i++ ) { 2273 jend = ii[i+1] - ii[i]; 2274 olen = 0; 2275 dlen = 0; 2276 for ( j=0; j<jend; j++ ) { 2277 if ( *jj < rstart || *jj >= rend) olen++; 2278 else dlen++; 2279 jj++; 2280 } 2281 olens[i] = olen; 2282 dlens[i] = dlen; 2283 } 2284 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2285 PetscFree(dlens); 2286 } else { 2287 int ml,nl; 2288 2289 M = *newmat; 2290 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2291 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2292 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2293 /* 2294 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2295 rather than the slower MatSetValues(). 2296 */ 2297 M->was_assembled = PETSC_TRUE; 2298 M->assembled = PETSC_FALSE; 2299 } 2300 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2301 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2302 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2303 ii = aij->i; 2304 jj = aij->j; 2305 aa = aij->a; 2306 for (i=0; i<m; i++) { 2307 row = rstart + i; 2308 nz = ii[i+1] - ii[i]; 2309 cwork = jj; jj += nz; 2310 vwork = aa; aa += nz; 2311 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2312 } 2313 2314 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2315 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2316 *newmat = M; 2317 2318 /* save submatrix used in processor for next request */ 2319 if (call == MAT_INITIAL_MATRIX) { 2320 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2321 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2322 } 2323 2324 PetscFunctionReturn(0); 2325 } 2326