1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.224 1997/11/03 04:45:30 bsmith Exp bsmith $"; 3 #endif 4 5 #include "pinclude/pviewer.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #include "src/vec/vecimpl.h" 8 #include "src/inline/spops.h" 9 10 /* local utility routine that creates a mapping from the global column 11 number to the local number in the off-diagonal part of the local 12 storage of the matrix. This is done in a non scable way since the 13 length of colmap equals the global matrix length. 14 */ 15 #undef __FUNC__ 16 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 17 int CreateColmap_MPIAIJ_Private(Mat mat) 18 { 19 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 20 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 21 int n = B->n,i; 22 23 PetscFunctionBegin; 24 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 25 PLogObjectMemory(mat,aij->N*sizeof(int)); 26 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 27 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 28 PetscFunctionReturn(0); 29 } 30 31 extern int DisAssemble_MPIAIJ(Mat); 32 33 #define CHUNKSIZE 15 34 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 35 { \ 36 \ 37 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 38 rmax = aimax[row]; nrow = ailen[row]; \ 39 col1 = col - shift; \ 40 \ 41 low = 0; high = nrow; \ 42 while (high-low > 5) { \ 43 t = (low+high)/2; \ 44 if (rp[t] > col) high = t; \ 45 else low = t; \ 46 } \ 47 for ( _i=0; _i<nrow; _i++ ) { \ 48 if (rp[_i] > col1) break; \ 49 if (rp[_i] == col1) { \ 50 if (addv == ADD_VALUES) ap[_i] += value; \ 51 else ap[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (nonew == 1) goto a_noinsert; \ 56 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 57 if (nrow >= rmax) { \ 58 /* there is no extra room in row, therefore enlarge */ \ 59 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 60 Scalar *new_a; \ 61 \ 62 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 63 \ 64 /* malloc new storage space */ \ 65 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 66 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 67 new_j = (int *) (new_a + new_nz); \ 68 new_i = new_j + new_nz; \ 69 \ 70 /* copy over old data into new slots */ \ 71 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 72 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 73 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 74 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 75 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 76 len*sizeof(int)); \ 77 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 78 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 79 len*sizeof(Scalar)); \ 80 /* free up old matrix storage */ \ 81 \ 82 PetscFree(a->a); \ 83 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 84 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 85 a->singlemalloc = 1; \ 86 \ 87 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 88 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 89 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 90 a->maxnz += CHUNKSIZE; \ 91 a->reallocs++; \ 92 } \ 93 N = nrow++ - 1; a->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for ( ii=N; ii>=_i; ii-- ) { \ 96 rp[ii+1] = rp[ii]; \ 97 ap[ii+1] = ap[ii]; \ 98 } \ 99 rp[_i] = col1; \ 100 ap[_i] = value; \ 101 a_noinsert: ; \ 102 ailen[row] = nrow; \ 103 } 104 105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 106 { \ 107 \ 108 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 109 rmax = bimax[row]; nrow = bilen[row]; \ 110 col1 = col - shift; \ 111 \ 112 low = 0; high = nrow; \ 113 while (high-low > 5) { \ 114 t = (low+high)/2; \ 115 if (rp[t] > col) high = t; \ 116 else low = t; \ 117 } \ 118 for ( _i=0; _i<nrow; _i++ ) { \ 119 if (rp[_i] > col1) break; \ 120 if (rp[_i] == col1) { \ 121 if (addv == ADD_VALUES) ap[_i] += value; \ 122 else ap[_i] = value; \ 123 goto b_noinsert; \ 124 } \ 125 } \ 126 if (nonew == 1) goto b_noinsert; \ 127 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 128 if (nrow >= rmax) { \ 129 /* there is no extra room in row, therefore enlarge */ \ 130 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 131 Scalar *new_a; \ 132 \ 133 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \ 134 \ 135 /* malloc new storage space */ \ 136 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 137 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 138 new_j = (int *) (new_a + new_nz); \ 139 new_i = new_j + new_nz; \ 140 \ 141 /* copy over old data into new slots */ \ 142 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 143 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 144 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 145 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 146 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 147 len*sizeof(int)); \ 148 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 149 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 150 len*sizeof(Scalar)); \ 151 /* free up old matrix storage */ \ 152 \ 153 PetscFree(b->a); \ 154 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 155 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 156 b->singlemalloc = 1; \ 157 \ 158 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 159 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 160 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 161 b->maxnz += CHUNKSIZE; \ 162 b->reallocs++; \ 163 } \ 164 N = nrow++ - 1; b->nz++; \ 165 /* shift up all the later entries in this row */ \ 166 for ( ii=N; ii>=_i; ii-- ) { \ 167 rp[ii+1] = rp[ii]; \ 168 ap[ii+1] = ap[ii]; \ 169 } \ 170 rp[_i] = col1; \ 171 ap[_i] = value; \ 172 b_noinsert: ; \ 173 bilen[row] = nrow; \ 174 } 175 176 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 177 #undef __FUNC__ 178 #define __FUNC__ "MatSetValues_MPIAIJ" 179 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 180 { 181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 182 Scalar value; 183 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 184 int cstart = aij->cstart, cend = aij->cend,row,col; 185 int roworiented = aij->roworiented; 186 187 /* Some Variables required in the macro */ 188 Mat A = aij->A; 189 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 190 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 191 Scalar *aa = a->a; 192 193 Mat B = aij->B; 194 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 195 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 196 Scalar *ba = b->a; 197 198 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 199 int nonew = a->nonew,shift = a->indexshift; 200 Scalar *ap; 201 202 PetscFunctionBegin; 203 for ( i=0; i<m; i++ ) { 204 #if defined(USE_PETSC_BOPT_g) 205 if (im[i] < 0) SETERRQ(1,0,"Negative row"); 206 if (im[i] >= aij->M) SETERRQ(1,0,"Row too large"); 207 #endif 208 if (im[i] >= rstart && im[i] < rend) { 209 row = im[i] - rstart; 210 for ( j=0; j<n; j++ ) { 211 if (in[j] >= cstart && in[j] < cend){ 212 col = in[j] - cstart; 213 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 214 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 215 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 216 } 217 #if defined(USE_PETSC_BOPT_g) 218 else if (in[j] < 0) {SETERRQ(1,0,"Negative column");} 219 else if (in[j] >= aij->N) {SETERRQ(1,0,"Col too large");} 220 #endif 221 else { 222 if (mat->was_assembled) { 223 if (!aij->colmap) { 224 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 225 } 226 col = aij->colmap[in[j]] - 1; 227 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 228 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 229 col = in[j]; 230 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 231 B = aij->B; 232 b = (Mat_SeqAIJ *) B->data; 233 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 234 ba = b->a; 235 } 236 } 237 else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(1,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(1,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(1,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(1,0,"Col too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } 294 else { 295 SETERRQ(1,0,"Only local values currently supported"); 296 } 297 } 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 PetscFunctionBegin; 315 /* make sure all processors are either in INSERTMODE or ADDMODE */ 316 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 317 if (addv == (ADD_VALUES|INSERT_VALUES)) { 318 SETERRQ(1,0,"Some processors inserted others added"); 319 } 320 mat->insertmode = addv; /* in case this processor had no cache */ 321 322 /* first count number of contributors to each processor */ 323 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 324 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 325 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 326 for ( i=0; i<aij->stash.n; i++ ) { 327 idx = aij->stash.idx[i]; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 331 } 332 } 333 } 334 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 335 336 /* inform other processors of number of messages and max length*/ 337 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 338 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 339 nreceives = work[rank]; 340 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 341 nmax = work[rank]; 342 PetscFree(work); 343 344 /* post receives: 345 1) each message will consist of ordered pairs 346 (global index,value) we store the global index as a double 347 to simplify the message passing. 348 2) since we don't know how long each individual message is we 349 allocate the largest needed buffer for each receive. Potentially 350 this is a lot of wasted space. 351 352 353 This could be done better. 354 */ 355 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 357 for ( i=0; i<nreceives; i++ ) { 358 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 359 comm,recv_waits+i);CHKERRQ(ierr); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 368 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 369 starts[0] = 0; 370 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 for ( i=0; i<aij->stash.n; i++ ) { 372 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 373 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 374 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 375 } 376 PetscFree(owner); 377 starts[0] = 0; 378 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 PetscFunctionReturn(0); 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 PetscFunctionBegin; 412 /* wait on receives */ 413 while (count) { 414 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 415 /* unpack receives into our local space */ 416 values = aij->rvalues + 3*imdex*aij->rmax; 417 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 418 n = n/3; 419 for ( i=0; i<n; i++ ) { 420 row = (int) PetscReal(values[3*i]) - aij->rstart; 421 col = (int) PetscReal(values[3*i+1]); 422 val = values[3*i+2]; 423 if (col >= aij->cstart && col < aij->cend) { 424 col -= aij->cstart; 425 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 426 } else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 479 PetscFunctionBegin; 480 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 481 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 /* the code does not do the diagonal entries correctly unless the 486 matrix is square and the column and row owerships are identical. 487 This is a BUG. The only way to fix it seems to be to access 488 aij->A and aij->B directly and not through the MatZeroRows() 489 routine. 490 */ 491 #undef __FUNC__ 492 #define __FUNC__ "MatZeroRows_MPIAIJ" 493 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 494 { 495 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 496 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 497 int *procs,*nprocs,j,found,idx,nsends,*work; 498 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 499 int *rvalues,tag = A->tag,count,base,slen,n,*source; 500 int *lens,imdex,*lrows,*values; 501 MPI_Comm comm = A->comm; 502 MPI_Request *send_waits,*recv_waits; 503 MPI_Status recv_status,*send_status; 504 IS istmp; 505 506 PetscFunctionBegin; 507 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 508 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 509 510 /* first count number of contributors to each processor */ 511 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 512 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 513 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 514 for ( i=0; i<N; i++ ) { 515 idx = rows[i]; 516 found = 0; 517 for ( j=0; j<size; j++ ) { 518 if (idx >= owners[j] && idx < owners[j+1]) { 519 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 520 } 521 } 522 if (!found) SETERRQ(1,0,"Index out of range"); 523 } 524 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 525 526 /* inform other processors of number of messages and max length*/ 527 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 528 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 529 nrecvs = work[rank]; 530 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 531 nmax = work[rank]; 532 PetscFree(work); 533 534 /* post receives: */ 535 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 536 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 537 for ( i=0; i<nrecvs; i++ ) { 538 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 539 } 540 541 /* do sends: 542 1) starts[i] gives the starting index in svalues for stuff going to 543 the ith processor 544 */ 545 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 546 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 547 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 548 starts[0] = 0; 549 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 550 for ( i=0; i<N; i++ ) { 551 svalues[starts[owner[i]]++] = rows[i]; 552 } 553 ISRestoreIndices(is,&rows); 554 555 starts[0] = 0; 556 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 557 count = 0; 558 for ( i=0; i<size; i++ ) { 559 if (procs[i]) { 560 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 561 } 562 } 563 PetscFree(starts); 564 565 base = owners[rank]; 566 567 /* wait on receives */ 568 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 569 source = lens + nrecvs; 570 count = nrecvs; slen = 0; 571 while (count) { 572 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 573 /* unpack receives into our local space */ 574 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 575 source[imdex] = recv_status.MPI_SOURCE; 576 lens[imdex] = n; 577 slen += n; 578 count--; 579 } 580 PetscFree(recv_waits); 581 582 /* move the data into the send scatter */ 583 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 584 count = 0; 585 for ( i=0; i<nrecvs; i++ ) { 586 values = rvalues + i*nmax; 587 for ( j=0; j<lens[i]; j++ ) { 588 lrows[count++] = values[j] - base; 589 } 590 } 591 PetscFree(rvalues); PetscFree(lens); 592 PetscFree(owner); PetscFree(nprocs); 593 594 /* actually zap the local rows */ 595 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 596 PLogObjectParent(A,istmp); 597 PetscFree(lrows); 598 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 599 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 600 ierr = ISDestroy(istmp); CHKERRQ(ierr); 601 602 /* wait on sends */ 603 if (nsends) { 604 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 605 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 606 PetscFree(send_status); 607 } 608 PetscFree(send_waits); PetscFree(svalues); 609 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNC__ 614 #define __FUNC__ "MatMult_MPIAIJ" 615 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 616 { 617 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 618 int ierr,nt; 619 620 PetscFunctionBegin; 621 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 622 if (nt != a->n) { 623 SETERRQ(1,0,"Incompatible partition of A and xx"); 624 } 625 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 626 ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr); 627 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 628 ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 629 PetscFunctionReturn(0); 630 } 631 632 #undef __FUNC__ 633 #define __FUNC__ "MatMultAdd_MPIAIJ" 634 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 635 { 636 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 637 int ierr; 638 639 PetscFunctionBegin; 640 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 641 ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 642 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 643 ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNC__ 648 #define __FUNC__ "MatMultTrans_MPIAIJ" 649 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 650 { 651 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 652 int ierr; 653 654 PetscFunctionBegin; 655 /* do nondiagonal part */ 656 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 657 /* send it on its way */ 658 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 659 /* do local part */ 660 ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr); 661 /* receive remote parts: note this assumes the values are not actually */ 662 /* inserted in yy until the next line, which is true for my implementation*/ 663 /* but is not perhaps always true. */ 664 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 670 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 671 { 672 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 673 int ierr; 674 675 PetscFunctionBegin; 676 /* do nondiagonal part */ 677 ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 678 /* send it on its way */ 679 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 680 /* do local part */ 681 ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 682 /* receive remote parts: note this assumes the values are not actually */ 683 /* inserted in yy until the next line, which is true for my implementation*/ 684 /* but is not perhaps always true. */ 685 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 /* 690 This only works correctly for square matrices where the subblock A->A is the 691 diagonal block 692 */ 693 #undef __FUNC__ 694 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 695 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 696 { 697 int ierr; 698 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 699 700 PetscFunctionBegin; 701 if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block"); 702 if (a->rstart != a->cstart || a->rend != a->cend) { 703 SETERRQ(1,0,"row partition must equal col partition"); 704 } 705 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 709 #undef __FUNC__ 710 #define __FUNC__ "MatScale_MPIAIJ" 711 int MatScale_MPIAIJ(Scalar *aa,Mat A) 712 { 713 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 714 int ierr; 715 716 PetscFunctionBegin; 717 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 718 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNC__ 723 #define __FUNC__ "MatDestroy_MPIAIJ" 724 int MatDestroy_MPIAIJ(PetscObject obj) 725 { 726 Mat mat = (Mat) obj; 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 728 int ierr; 729 730 PetscFunctionBegin; 731 #if defined(USE_PETSC_LOG) 732 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 733 #endif 734 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 735 PetscFree(aij->rowners); 736 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 737 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 738 if (aij->colmap) PetscFree(aij->colmap); 739 if (aij->garray) PetscFree(aij->garray); 740 if (aij->lvec) VecDestroy(aij->lvec); 741 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 742 if (aij->rowvalues) PetscFree(aij->rowvalues); 743 PetscFree(aij); 744 PLogObjectDestroy(mat); 745 PetscHeaderDestroy(mat); 746 PetscFunctionReturn(0); 747 } 748 749 #undef __FUNC__ 750 #define __FUNC__ "MatView_MPIAIJ_Binary" 751 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 752 { 753 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 754 int ierr; 755 756 PetscFunctionBegin; 757 if (aij->size == 1) { 758 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 759 } 760 else SETERRQ(1,0,"Only uniprocessor output supported"); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNC__ 765 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 766 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 769 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 770 int ierr, format,shift = C->indexshift,rank; 771 FILE *fd; 772 ViewerType vtype; 773 774 PetscFunctionBegin; 775 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 776 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 777 ierr = ViewerGetFormat(viewer,&format); 778 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 779 MatInfo info; 780 int flg; 781 MPI_Comm_rank(mat->comm,&rank); 782 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 783 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 784 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 785 PetscSequentialPhaseBegin(mat->comm,1); 786 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 787 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 788 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 789 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 790 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 791 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 792 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 793 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 794 fflush(fd); 795 PetscSequentialPhaseEnd(mat->comm,1); 796 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 799 PetscFunctionReturn(0); 800 } 801 } 802 803 if (vtype == DRAW_VIEWER) { 804 Draw draw; 805 PetscTruth isnull; 806 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 807 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 808 } 809 810 if (vtype == ASCII_FILE_VIEWER) { 811 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 812 PetscSequentialPhaseBegin(mat->comm,1); 813 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 814 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 815 aij->cend); 816 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 817 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 818 fflush(fd); 819 PetscSequentialPhaseEnd(mat->comm,1); 820 } else { 821 int size = aij->size; 822 rank = aij->rank; 823 if (size == 1) { 824 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 825 } else { 826 /* assemble the entire matrix onto first processor. */ 827 Mat A; 828 Mat_SeqAIJ *Aloc; 829 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 830 Scalar *a; 831 832 if (!rank) { 833 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 834 } else { 835 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 836 } 837 PLogObjectParent(mat,A); 838 839 /* copy over the A part */ 840 Aloc = (Mat_SeqAIJ*) aij->A->data; 841 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 842 row = aij->rstart; 843 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 844 for ( i=0; i<m; i++ ) { 845 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 846 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 847 } 848 aj = Aloc->j; 849 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 850 851 /* copy over the B part */ 852 Aloc = (Mat_SeqAIJ*) aij->B->data; 853 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 854 row = aij->rstart; 855 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 856 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 857 for ( i=0; i<m; i++ ) { 858 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 859 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 860 } 861 PetscFree(ct); 862 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 863 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 864 /* 865 Everyone has to call to draw the matrix since the graphics waits are 866 synchronized across all processors that share the Draw object 867 */ 868 if (!rank || vtype == DRAW_VIEWER) { 869 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 870 } 871 ierr = MatDestroy(A); CHKERRQ(ierr); 872 } 873 } 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNC__ 878 #define __FUNC__ "MatView_MPIAIJ" 879 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 880 { 881 Mat mat = (Mat) obj; 882 int ierr; 883 ViewerType vtype; 884 885 PetscFunctionBegin; 886 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 887 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 888 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 889 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 890 } 891 else if (vtype == BINARY_FILE_VIEWER) { 892 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 893 } 894 PetscFunctionReturn(0); 895 } 896 897 /* 898 This has to provide several versions. 899 900 2) a) use only local smoothing updating outer values only once. 901 b) local smoothing updating outer values each inner iteration 902 3) color updating out values betwen colors. 903 */ 904 #undef __FUNC__ 905 #define __FUNC__ "MatRelax_MPIAIJ" 906 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 907 double fshift,int its,Vec xx) 908 { 909 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 910 Mat AA = mat->A, BB = mat->B; 911 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 912 Scalar *b,*x,*xs,*ls,d,*v,sum; 913 int ierr,*idx, *diag; 914 int n = mat->n, m = mat->m, i,shift = A->indexshift; 915 916 PetscFunctionBegin; 917 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 918 xs = x + shift; /* shift by one for index start of 1 */ 919 ls = ls + shift; 920 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 921 diag = A->diag; 922 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 923 if (flag & SOR_ZERO_INITIAL_GUESS) { 924 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 928 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 929 while (its--) { 930 /* go down through the rows */ 931 for ( i=0; i<m; i++ ) { 932 n = A->i[i+1] - A->i[i]; 933 idx = A->j + A->i[i] + shift; 934 v = A->a + A->i[i] + shift; 935 sum = b[i]; 936 SPARSEDENSEMDOT(sum,xs,v,idx,n); 937 d = fshift + A->a[diag[i]+shift]; 938 n = B->i[i+1] - B->i[i]; 939 idx = B->j + B->i[i] + shift; 940 v = B->a + B->i[i] + shift; 941 SPARSEDENSEMDOT(sum,ls,v,idx,n); 942 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 943 } 944 /* come up through the rows */ 945 for ( i=m-1; i>-1; i-- ) { 946 n = A->i[i+1] - A->i[i]; 947 idx = A->j + A->i[i] + shift; 948 v = A->a + A->i[i] + shift; 949 sum = b[i]; 950 SPARSEDENSEMDOT(sum,xs,v,idx,n); 951 d = fshift + A->a[diag[i]+shift]; 952 n = B->i[i+1] - B->i[i]; 953 idx = B->j + B->i[i] + shift; 954 v = B->a + B->i[i] + shift; 955 SPARSEDENSEMDOT(sum,ls,v,idx,n); 956 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 957 } 958 } 959 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 960 if (flag & SOR_ZERO_INITIAL_GUESS) { 961 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 965 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 966 while (its--) { 967 for ( i=0; i<m; i++ ) { 968 n = A->i[i+1] - A->i[i]; 969 idx = A->j + A->i[i] + shift; 970 v = A->a + A->i[i] + shift; 971 sum = b[i]; 972 SPARSEDENSEMDOT(sum,xs,v,idx,n); 973 d = fshift + A->a[diag[i]+shift]; 974 n = B->i[i+1] - B->i[i]; 975 idx = B->j + B->i[i] + shift; 976 v = B->a + B->i[i] + shift; 977 SPARSEDENSEMDOT(sum,ls,v,idx,n); 978 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 979 } 980 } 981 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 982 if (flag & SOR_ZERO_INITIAL_GUESS) { 983 ierr = (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 987 mat->Mvctx); CHKERRQ(ierr); 988 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 989 mat->Mvctx); CHKERRQ(ierr); 990 while (its--) { 991 for ( i=m-1; i>-1; i-- ) { 992 n = A->i[i+1] - A->i[i]; 993 idx = A->j + A->i[i] + shift; 994 v = A->a + A->i[i] + shift; 995 sum = b[i]; 996 SPARSEDENSEMDOT(sum,xs,v,idx,n); 997 d = fshift + A->a[diag[i]+shift]; 998 n = B->i[i+1] - B->i[i]; 999 idx = B->j + B->i[i] + shift; 1000 v = B->a + B->i[i] + shift; 1001 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1002 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1003 } 1004 } 1005 } else { 1006 SETERRQ(1,0,"Option not supported"); 1007 } 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNC__ 1012 #define __FUNC__ "MatGetInfo_MPIAIJ" 1013 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1014 { 1015 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1016 Mat A = mat->A, B = mat->B; 1017 int ierr; 1018 double isend[5], irecv[5]; 1019 1020 PetscFunctionBegin; 1021 info->rows_global = (double)mat->M; 1022 info->columns_global = (double)mat->N; 1023 info->rows_local = (double)mat->m; 1024 info->columns_local = (double)mat->N; 1025 info->block_size = 1.0; 1026 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1027 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1028 isend[3] = info->memory; isend[4] = info->mallocs; 1029 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1030 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1031 isend[3] += info->memory; isend[4] += info->mallocs; 1032 if (flag == MAT_LOCAL) { 1033 info->nz_used = isend[0]; 1034 info->nz_allocated = isend[1]; 1035 info->nz_unneeded = isend[2]; 1036 info->memory = isend[3]; 1037 info->mallocs = isend[4]; 1038 } else if (flag == MAT_GLOBAL_MAX) { 1039 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1040 info->nz_used = irecv[0]; 1041 info->nz_allocated = irecv[1]; 1042 info->nz_unneeded = irecv[2]; 1043 info->memory = irecv[3]; 1044 info->mallocs = irecv[4]; 1045 } else if (flag == MAT_GLOBAL_SUM) { 1046 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1047 info->nz_used = irecv[0]; 1048 info->nz_allocated = irecv[1]; 1049 info->nz_unneeded = irecv[2]; 1050 info->memory = irecv[3]; 1051 info->mallocs = irecv[4]; 1052 } 1053 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1054 info->fill_ratio_needed = 0; 1055 info->factor_mallocs = 0; 1056 1057 PetscFunctionReturn(0); 1058 } 1059 1060 #undef __FUNC__ 1061 #define __FUNC__ "MatSetOption_MPIAIJ" 1062 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1063 { 1064 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1065 1066 PetscFunctionBegin; 1067 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1068 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1069 op == MAT_COLUMNS_UNSORTED || 1070 op == MAT_COLUMNS_SORTED || 1071 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1072 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1073 MatSetOption(a->A,op); 1074 MatSetOption(a->B,op); 1075 } else if (op == MAT_ROW_ORIENTED) { 1076 a->roworiented = 1; 1077 MatSetOption(a->A,op); 1078 MatSetOption(a->B,op); 1079 } else if (op == MAT_ROWS_SORTED || 1080 op == MAT_ROWS_UNSORTED || 1081 op == MAT_SYMMETRIC || 1082 op == MAT_STRUCTURALLY_SYMMETRIC || 1083 op == MAT_YES_NEW_DIAGONALS) 1084 PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n"); 1085 else if (op == MAT_COLUMN_ORIENTED) { 1086 a->roworiented = 0; 1087 MatSetOption(a->A,op); 1088 MatSetOption(a->B,op); 1089 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1090 a->donotstash = 1; 1091 } else if (op == MAT_NO_NEW_DIAGONALS){ 1092 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1093 } else { 1094 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1095 } 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatGetSize_MPIAIJ" 1101 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 1105 PetscFunctionBegin; 1106 if (m) *m = mat->M; 1107 if (n) *n = mat->N; 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNC__ 1112 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1113 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1114 { 1115 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1116 1117 PetscFunctionBegin; 1118 if (m) *m = mat->m; 1119 if (n) *n = mat->N; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNC__ 1124 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1125 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1126 { 1127 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1128 1129 PetscFunctionBegin; 1130 *m = mat->rstart; *n = mat->rend; 1131 PetscFunctionReturn(0); 1132 } 1133 1134 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1135 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1136 1137 #undef __FUNC__ 1138 #define __FUNC__ "MatGetRow_MPIAIJ" 1139 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1140 { 1141 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1142 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1143 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1144 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1145 int *cmap, *idx_p; 1146 1147 PetscFunctionBegin; 1148 if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active"); 1149 mat->getrowactive = PETSC_TRUE; 1150 1151 if (!mat->rowvalues && (idx || v)) { 1152 /* 1153 allocate enough space to hold information from the longest row. 1154 */ 1155 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1156 int max = 1,m = mat->m,tmp; 1157 for ( i=0; i<m; i++ ) { 1158 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1159 if (max < tmp) { max = tmp; } 1160 } 1161 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1162 CHKPTRQ(mat->rowvalues); 1163 mat->rowindices = (int *) (mat->rowvalues + max); 1164 } 1165 1166 if (row < rstart || row >= rend) SETERRQ(1,0,"Only local rows") 1167 lrow = row - rstart; 1168 1169 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1170 if (!v) {pvA = 0; pvB = 0;} 1171 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1172 ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1173 ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1174 nztot = nzA + nzB; 1175 1176 cmap = mat->garray; 1177 if (v || idx) { 1178 if (nztot) { 1179 /* Sort by increasing column numbers, assuming A and B already sorted */ 1180 int imark = -1; 1181 if (v) { 1182 *v = v_p = mat->rowvalues; 1183 for ( i=0; i<nzB; i++ ) { 1184 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1185 else break; 1186 } 1187 imark = i; 1188 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1189 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1190 } 1191 if (idx) { 1192 *idx = idx_p = mat->rowindices; 1193 if (imark > -1) { 1194 for ( i=0; i<imark; i++ ) { 1195 idx_p[i] = cmap[cworkB[i]]; 1196 } 1197 } else { 1198 for ( i=0; i<nzB; i++ ) { 1199 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1200 else break; 1201 } 1202 imark = i; 1203 } 1204 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1205 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1206 } 1207 } 1208 else { 1209 if (idx) *idx = 0; 1210 if (v) *v = 0; 1211 } 1212 } 1213 *nz = nztot; 1214 ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1215 ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNC__ 1220 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1221 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1222 { 1223 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1224 1225 PetscFunctionBegin; 1226 if (aij->getrowactive == PETSC_FALSE) { 1227 SETERRQ(1,0,"MatGetRow not called"); 1228 } 1229 aij->getrowactive = PETSC_FALSE; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNC__ 1234 #define __FUNC__ "MatNorm_MPIAIJ" 1235 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1236 { 1237 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1238 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1239 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1240 double sum = 0.0; 1241 Scalar *v; 1242 1243 PetscFunctionBegin; 1244 if (aij->size == 1) { 1245 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1246 } else { 1247 if (type == NORM_FROBENIUS) { 1248 v = amat->a; 1249 for (i=0; i<amat->nz; i++ ) { 1250 #if defined(USE_PETSC_COMPLEX) 1251 sum += real(conj(*v)*(*v)); v++; 1252 #else 1253 sum += (*v)*(*v); v++; 1254 #endif 1255 } 1256 v = bmat->a; 1257 for (i=0; i<bmat->nz; i++ ) { 1258 #if defined(USE_PETSC_COMPLEX) 1259 sum += real(conj(*v)*(*v)); v++; 1260 #else 1261 sum += (*v)*(*v); v++; 1262 #endif 1263 } 1264 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1265 *norm = sqrt(*norm); 1266 } else if (type == NORM_1) { /* max column norm */ 1267 double *tmp, *tmp2; 1268 int *jj, *garray = aij->garray; 1269 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1270 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1271 PetscMemzero(tmp,aij->N*sizeof(double)); 1272 *norm = 0.0; 1273 v = amat->a; jj = amat->j; 1274 for ( j=0; j<amat->nz; j++ ) { 1275 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1276 } 1277 v = bmat->a; jj = bmat->j; 1278 for ( j=0; j<bmat->nz; j++ ) { 1279 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1280 } 1281 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1282 for ( j=0; j<aij->N; j++ ) { 1283 if (tmp2[j] > *norm) *norm = tmp2[j]; 1284 } 1285 PetscFree(tmp); PetscFree(tmp2); 1286 } else if (type == NORM_INFINITY) { /* max row norm */ 1287 double ntemp = 0.0; 1288 for ( j=0; j<amat->m; j++ ) { 1289 v = amat->a + amat->i[j] + shift; 1290 sum = 0.0; 1291 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1292 sum += PetscAbsScalar(*v); v++; 1293 } 1294 v = bmat->a + bmat->i[j] + shift; 1295 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1296 sum += PetscAbsScalar(*v); v++; 1297 } 1298 if (sum > ntemp) ntemp = sum; 1299 } 1300 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1301 } else { 1302 SETERRQ(1,0,"No support for two norm"); 1303 } 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNC__ 1309 #define __FUNC__ "MatTranspose_MPIAIJ" 1310 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1311 { 1312 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1313 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1314 int ierr,shift = Aloc->indexshift; 1315 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1316 Mat B; 1317 Scalar *array; 1318 1319 PetscFunctionBegin; 1320 if (matout == PETSC_NULL && M != N) { 1321 SETERRQ(1,0,"Square matrix only for in-place"); 1322 } 1323 1324 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1325 1326 /* copy over the A part */ 1327 Aloc = (Mat_SeqAIJ*) a->A->data; 1328 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1329 row = a->rstart; 1330 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1331 for ( i=0; i<m; i++ ) { 1332 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1333 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1334 } 1335 aj = Aloc->j; 1336 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1337 1338 /* copy over the B part */ 1339 Aloc = (Mat_SeqAIJ*) a->B->data; 1340 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1341 row = a->rstart; 1342 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1343 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1344 for ( i=0; i<m; i++ ) { 1345 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1346 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1347 } 1348 PetscFree(ct); 1349 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1350 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1351 if (matout != PETSC_NULL) { 1352 *matout = B; 1353 } else { 1354 /* This isn't really an in-place transpose .... but free data structures from a */ 1355 PetscFree(a->rowners); 1356 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1357 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1358 if (a->colmap) PetscFree(a->colmap); 1359 if (a->garray) PetscFree(a->garray); 1360 if (a->lvec) VecDestroy(a->lvec); 1361 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1362 PetscFree(a); 1363 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1364 PetscHeaderDestroy(B); 1365 } 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNC__ 1370 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1371 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1372 { 1373 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1374 Mat a = aij->A, b = aij->B; 1375 int ierr,s1,s2,s3; 1376 1377 PetscFunctionBegin; 1378 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1379 if (rr) { 1380 s3 = aij->n; 1381 VecGetLocalSize_Fast(rr,s1); 1382 if (s1!=s3) SETERRQ(1,0,"right vector non-conforming local size"); 1383 /* Overlap communication with computation. */ 1384 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1385 } 1386 if (ll) { 1387 VecGetLocalSize_Fast(ll,s1); 1388 if (s1!=s2) SETERRQ(1,0,"left vector non-conforming local size"); 1389 ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr); 1390 } 1391 /* scale the diagonal block */ 1392 ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1393 1394 if (rr) { 1395 /* Do a scatter end and then right scale the off-diagonal block */ 1396 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1397 ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1398 } 1399 1400 PetscFunctionReturn(0); 1401 } 1402 1403 1404 extern int MatPrintHelp_SeqAIJ(Mat); 1405 #undef __FUNC__ 1406 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1407 int MatPrintHelp_MPIAIJ(Mat A) 1408 { 1409 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1410 int ierr; 1411 1412 PetscFunctionBegin; 1413 if (!a->rank) { 1414 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1415 } 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNC__ 1420 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1421 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1422 { 1423 PetscFunctionBegin; 1424 *bs = 1; 1425 PetscFunctionReturn(0); 1426 } 1427 #undef __FUNC__ 1428 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1429 int MatSetUnfactored_MPIAIJ(Mat A) 1430 { 1431 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1432 int ierr; 1433 1434 PetscFunctionBegin; 1435 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNC__ 1440 #define __FUNC__ "MatEqual_MPIAIJ" 1441 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1442 { 1443 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1444 Mat a, b, c, d; 1445 PetscTruth flg; 1446 int ierr; 1447 1448 PetscFunctionBegin; 1449 if (B->type != MATMPIAIJ) SETERRQ(1,0,"Matrices must be same type"); 1450 a = matA->A; b = matA->B; 1451 c = matB->A; d = matB->B; 1452 1453 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1454 if (flg == PETSC_TRUE) { 1455 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1456 } 1457 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1462 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1463 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1464 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1465 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,MatGetSubMatrixCall,Mat *); 1466 1467 /* -------------------------------------------------------------------*/ 1468 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1469 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1470 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1471 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1472 0,0, 1473 0,0, 1474 0,0, 1475 MatRelax_MPIAIJ, 1476 MatTranspose_MPIAIJ, 1477 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1478 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1479 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1480 0, 1481 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1482 0,0,0,0, 1483 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1484 0,0, 1485 0,0,MatConvertSameType_MPIAIJ,0,0, 1486 0,0,0, 1487 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1488 MatPrintHelp_MPIAIJ, 1489 MatScale_MPIAIJ,0,0,0, 1490 MatGetBlockSize_MPIAIJ,0,0,0,0, 1491 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1492 0,0,MatGetSubMatrix_MPIAIJ}; 1493 1494 1495 #undef __FUNC__ 1496 #define __FUNC__ "MatCreateMPIAIJ" 1497 /*@C 1498 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1499 (the default parallel PETSc format). For good matrix assembly performance 1500 the user should preallocate the matrix storage by setting the parameters 1501 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1502 performance can be increased by more than a factor of 50. 1503 1504 Input Parameters: 1505 . comm - MPI communicator 1506 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1507 This value should be the same as the local size used in creating the 1508 y vector for the matrix-vector product y = Ax. 1509 . n - This value should be the same as the local size used in creating the 1510 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1511 calculated if N is given) For square matrices n is almost always m. 1512 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1513 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1514 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1515 (same for all local rows) 1516 . d_nzz - array containing the number of nonzeros in the various rows of the 1517 diagonal portion of the local submatrix (possibly different for each row) 1518 or PETSC_NULL. You must leave room for the diagonal entry even if 1519 it is zero. 1520 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1521 submatrix (same for all local rows). 1522 . o_nzz - array containing the number of nonzeros in the various rows of the 1523 off-diagonal portion of the local submatrix (possibly different for 1524 each row) or PETSC_NULL. 1525 1526 Output Parameter: 1527 . A - the matrix 1528 1529 Notes: 1530 The AIJ format (also called the Yale sparse matrix format or 1531 compressed row storage), is fully compatible with standard Fortran 77 1532 storage. That is, the stored row and column indices can begin at 1533 either one (as in Fortran) or zero. See the users manual for details. 1534 1535 The user MUST specify either the local or global matrix dimensions 1536 (possibly both). 1537 1538 By default, this format uses inodes (identical nodes) when possible. 1539 We search for consecutive rows with the same nonzero structure, thereby 1540 reusing matrix information to achieve increased efficiency. 1541 1542 Options Database Keys: 1543 $ -mat_aij_no_inode - Do not use inodes 1544 $ -mat_aij_inode_limit <limit> - Set inode limit. 1545 $ (max limit=5) 1546 $ -mat_aij_oneindex - Internally use indexing starting at 1 1547 $ rather than 0. Note: When calling MatSetValues(), 1548 $ the user still MUST index entries starting at 0! 1549 1550 Storage Information: 1551 For a square global matrix we define each processor's diagonal portion 1552 to be its local rows and the corresponding columns (a square submatrix); 1553 each processor's off-diagonal portion encompasses the remainder of the 1554 local matrix (a rectangular submatrix). 1555 1556 The user can specify preallocated storage for the diagonal part of 1557 the local submatrix with either d_nz or d_nnz (not both). Set 1558 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1559 memory allocation. Likewise, specify preallocated storage for the 1560 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1561 1562 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1563 the figure below we depict these three local rows and all columns (0-11). 1564 1565 $ 0 1 2 3 4 5 6 7 8 9 10 11 1566 $ ------------------- 1567 $ row 3 | o o o d d d o o o o o o 1568 $ row 4 | o o o d d d o o o o o o 1569 $ row 5 | o o o d d d o o o o o o 1570 $ ------------------- 1571 $ 1572 1573 Thus, any entries in the d locations are stored in the d (diagonal) 1574 submatrix, and any entries in the o locations are stored in the 1575 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1576 stored simply in the MATSEQAIJ format for compressed row storage. 1577 1578 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1579 and o_nz should indicate the number of nonzeros per row in the o matrix. 1580 In general, for PDE problems in which most nonzeros are near the diagonal, 1581 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1582 or you will get TERRIBLE performance; see the users' manual chapter on 1583 matrices. 1584 1585 .keywords: matrix, aij, compressed row, sparse, parallel 1586 1587 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1588 @*/ 1589 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1590 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1591 { 1592 Mat B; 1593 Mat_MPIAIJ *b; 1594 int ierr, i,sum[2],work[2],size; 1595 1596 PetscFunctionBegin; 1597 MPI_Comm_size(comm,&size); 1598 if (size == 1) { 1599 if (M == PETSC_DECIDE) M = m; 1600 if (N == PETSC_DECIDE) N = n; 1601 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 *A = 0; 1606 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1607 PLogObjectCreate(B); 1608 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1609 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1610 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1611 B->destroy = MatDestroy_MPIAIJ; 1612 B->view = MatView_MPIAIJ; 1613 B->factor = 0; 1614 B->assembled = PETSC_FALSE; 1615 B->mapping = 0; 1616 1617 B->insertmode = NOT_SET_VALUES; 1618 b->size = size; 1619 MPI_Comm_rank(comm,&b->rank); 1620 1621 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1622 SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1623 } 1624 1625 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1626 work[0] = m; work[1] = n; 1627 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1628 if (M == PETSC_DECIDE) M = sum[0]; 1629 if (N == PETSC_DECIDE) N = sum[1]; 1630 } 1631 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1632 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1633 b->m = m; B->m = m; 1634 b->n = n; B->n = n; 1635 b->N = N; B->N = N; 1636 b->M = M; B->M = M; 1637 1638 /* build local table of row and column ownerships */ 1639 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1640 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1641 b->cowners = b->rowners + b->size + 2; 1642 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1643 b->rowners[0] = 0; 1644 for ( i=2; i<=b->size; i++ ) { 1645 b->rowners[i] += b->rowners[i-1]; 1646 } 1647 b->rstart = b->rowners[b->rank]; 1648 b->rend = b->rowners[b->rank+1]; 1649 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1650 b->cowners[0] = 0; 1651 for ( i=2; i<=b->size; i++ ) { 1652 b->cowners[i] += b->cowners[i-1]; 1653 } 1654 b->cstart = b->cowners[b->rank]; 1655 b->cend = b->cowners[b->rank+1]; 1656 1657 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1658 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1659 PLogObjectParent(B,b->A); 1660 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1661 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1662 PLogObjectParent(B,b->B); 1663 1664 /* build cache for off array entries formed */ 1665 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1666 b->donotstash = 0; 1667 b->colmap = 0; 1668 b->garray = 0; 1669 b->roworiented = 1; 1670 1671 /* stuff used for matrix vector multiply */ 1672 b->lvec = 0; 1673 b->Mvctx = 0; 1674 1675 /* stuff for MatGetRow() */ 1676 b->rowindices = 0; 1677 b->rowvalues = 0; 1678 b->getrowactive = PETSC_FALSE; 1679 1680 *A = B; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNC__ 1685 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1686 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1687 { 1688 Mat mat; 1689 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1690 int ierr, len=0, flg; 1691 1692 PetscFunctionBegin; 1693 *newmat = 0; 1694 PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1695 PLogObjectCreate(mat); 1696 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1697 PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps)); 1698 mat->destroy = MatDestroy_MPIAIJ; 1699 mat->view = MatView_MPIAIJ; 1700 mat->factor = matin->factor; 1701 mat->assembled = PETSC_TRUE; 1702 1703 a->m = mat->m = oldmat->m; 1704 a->n = mat->n = oldmat->n; 1705 a->M = mat->M = oldmat->M; 1706 a->N = mat->N = oldmat->N; 1707 1708 a->rstart = oldmat->rstart; 1709 a->rend = oldmat->rend; 1710 a->cstart = oldmat->cstart; 1711 a->cend = oldmat->cend; 1712 a->size = oldmat->size; 1713 a->rank = oldmat->rank; 1714 mat->insertmode = NOT_SET_VALUES; 1715 a->rowvalues = 0; 1716 a->getrowactive = PETSC_FALSE; 1717 1718 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1719 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1720 a->cowners = a->rowners + a->size + 2; 1721 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1722 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1723 if (oldmat->colmap) { 1724 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1725 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1726 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1727 } else a->colmap = 0; 1728 if (oldmat->garray) { 1729 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1730 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1731 PLogObjectMemory(mat,len*sizeof(int)); 1732 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1733 } else a->garray = 0; 1734 1735 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1736 PLogObjectParent(mat,a->lvec); 1737 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1738 PLogObjectParent(mat,a->Mvctx); 1739 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1740 PLogObjectParent(mat,a->A); 1741 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1742 PLogObjectParent(mat,a->B); 1743 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1744 if (flg) { 1745 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1746 } 1747 *newmat = mat; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 #include "sys.h" 1752 1753 #undef __FUNC__ 1754 #define __FUNC__ "MatLoad_MPIAIJ" 1755 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1756 { 1757 Mat A; 1758 Scalar *vals,*svals; 1759 MPI_Comm comm = ((PetscObject)viewer)->comm; 1760 MPI_Status status; 1761 int i, nz, ierr, j,rstart, rend, fd; 1762 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1763 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1764 int tag = ((PetscObject)viewer)->tag; 1765 1766 PetscFunctionBegin; 1767 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1768 if (!rank) { 1769 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1770 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1771 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object"); 1772 if (header[3] < 0) { 1773 SETERRQ(1,1,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 1774 } 1775 } 1776 1777 1778 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1779 M = header[1]; N = header[2]; 1780 /* determine ownership of all rows */ 1781 m = M/size + ((M % size) > rank); 1782 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1783 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1784 rowners[0] = 0; 1785 for ( i=2; i<=size; i++ ) { 1786 rowners[i] += rowners[i-1]; 1787 } 1788 rstart = rowners[rank]; 1789 rend = rowners[rank+1]; 1790 1791 /* distribute row lengths to all processors */ 1792 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1793 offlens = ourlens + (rend-rstart); 1794 if (!rank) { 1795 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1796 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1797 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1798 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1799 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1800 PetscFree(sndcounts); 1801 } else { 1802 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1803 } 1804 1805 if (!rank) { 1806 /* calculate the number of nonzeros on each processor */ 1807 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1808 PetscMemzero(procsnz,size*sizeof(int)); 1809 for ( i=0; i<size; i++ ) { 1810 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1811 procsnz[i] += rowlengths[j]; 1812 } 1813 } 1814 PetscFree(rowlengths); 1815 1816 /* determine max buffer needed and allocate it */ 1817 maxnz = 0; 1818 for ( i=0; i<size; i++ ) { 1819 maxnz = PetscMax(maxnz,procsnz[i]); 1820 } 1821 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1822 1823 /* read in my part of the matrix column indices */ 1824 nz = procsnz[0]; 1825 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1826 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1827 1828 /* read in every one elses and ship off */ 1829 for ( i=1; i<size; i++ ) { 1830 nz = procsnz[i]; 1831 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1832 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1833 } 1834 PetscFree(cols); 1835 } else { 1836 /* determine buffer space needed for message */ 1837 nz = 0; 1838 for ( i=0; i<m; i++ ) { 1839 nz += ourlens[i]; 1840 } 1841 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1842 1843 /* receive message of column indices*/ 1844 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1845 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1846 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1847 } 1848 1849 /* loop over local rows, determining number of off diagonal entries */ 1850 PetscMemzero(offlens,m*sizeof(int)); 1851 jj = 0; 1852 for ( i=0; i<m; i++ ) { 1853 for ( j=0; j<ourlens[i]; j++ ) { 1854 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1855 jj++; 1856 } 1857 } 1858 1859 /* create our matrix */ 1860 for ( i=0; i<m; i++ ) { 1861 ourlens[i] -= offlens[i]; 1862 } 1863 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1864 A = *newmat; 1865 MatSetOption(A,MAT_COLUMNS_SORTED); 1866 for ( i=0; i<m; i++ ) { 1867 ourlens[i] += offlens[i]; 1868 } 1869 1870 if (!rank) { 1871 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1872 1873 /* read in my part of the matrix numerical values */ 1874 nz = procsnz[0]; 1875 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1876 1877 /* insert into matrix */ 1878 jj = rstart; 1879 smycols = mycols; 1880 svals = vals; 1881 for ( i=0; i<m; i++ ) { 1882 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1883 smycols += ourlens[i]; 1884 svals += ourlens[i]; 1885 jj++; 1886 } 1887 1888 /* read in other processors and ship out */ 1889 for ( i=1; i<size; i++ ) { 1890 nz = procsnz[i]; 1891 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1892 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1893 } 1894 PetscFree(procsnz); 1895 } else { 1896 /* receive numeric values */ 1897 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1898 1899 /* receive message of values*/ 1900 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1901 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1902 if (maxnz != nz) SETERRQ(1,0,"something is wrong with file"); 1903 1904 /* insert into matrix */ 1905 jj = rstart; 1906 smycols = mycols; 1907 svals = vals; 1908 for ( i=0; i<m; i++ ) { 1909 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1910 smycols += ourlens[i]; 1911 svals += ourlens[i]; 1912 jj++; 1913 } 1914 } 1915 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1916 1917 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1918 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 #undef __FUNC__ 1923 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1924 /* 1925 Not great since it makes two copies of the submatrix, first an SeqAIJ 1926 in local and then by concatenating the local matrices the end result. 1927 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1928 */ 1929 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatGetSubMatrixCall call,Mat *newmat) 1930 { 1931 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1932 Mat *local,M; 1933 Scalar *vwork,*aa; 1934 MPI_Comm comm = mat->comm; 1935 Mat_SeqAIJ *aij; 1936 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1937 1938 PetscFunctionBegin; 1939 MPI_Comm_rank(comm,&rank); 1940 MPI_Comm_size(comm,&size); 1941 1942 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1943 1944 /* 1945 m - number of local rows 1946 n - number of columns (same on all processors) 1947 rstart - first row in new global matrix generated 1948 */ 1949 ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr); 1950 if (call == MAT_INITIAL_MATRIX) { 1951 aij = (Mat_SeqAIJ *) (*local)->data; 1952 if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 1953 ii = aij->i; 1954 jj = aij->j; 1955 1956 /* 1957 Determine the number of non-zeros in the diagonal and off-diagonal 1958 portions of the matrix in order to do correct preallocation 1959 */ 1960 1961 /* first get start and end of "diagonal" columns */ 1962 nlocal = n/size + ((n % size) > rank); 1963 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1964 rstart = rend - nlocal; 1965 1966 /* next, compute all the lengths */ 1967 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 1968 olens = dlens + m; 1969 for ( i=0; i<m; i++ ) { 1970 jend = ii[i+1] - ii[i]; 1971 olen = 0; 1972 dlen = 0; 1973 for ( j=0; j<jend; j++ ) { 1974 if ( *jj < rstart || *jj >= rend) olen++; 1975 else dlen++; 1976 jj++; 1977 } 1978 olens[i] = olen; 1979 dlens[i] = dlen; 1980 } 1981 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1982 PetscFree(dlens); 1983 } else { 1984 int ml,nl; 1985 1986 M = *newmat; 1987 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1988 if (ml != m) SETERRQ(1,1,"Previous matrix must be same size/layout as request"); 1989 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1990 } 1991 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 1992 aij = (Mat_SeqAIJ *) (*local)->data; 1993 if (aij->indexshift) SETERRQ(1,1,"No support for index shifted matrix"); 1994 ii = aij->i; 1995 jj = aij->j; 1996 aa = aij->a; 1997 for (i=0; i<m; i++) { 1998 row = rstart + i; 1999 nz = ii[i+1] - ii[i]; 2000 cwork = jj; jj += nz; 2001 vwork = aa; aa += nz; 2002 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2003 } 2004 2005 ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr); 2006 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2007 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2008 *newmat = M; 2009 PetscFunctionReturn(0); 2010 } 2011