1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpidense.c,v 1.115 1999/06/08 22:55:41 balay Exp balay $"; 3 #endif 4 5 /* 6 Basic functions for basic parallel dense matrices. 7 */ 8 9 #include "src/mat/impls/dense/mpi/mpidense.h" 10 #include "src/vec/vecimpl.h" 11 12 extern int MatSetUpMultiply_MPIDense(Mat); 13 14 #undef __FUNC__ 15 #define __FUNC__ "MatSetValues_MPIDense" 16 int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v,InsertMode addv) 17 { 18 Mat_MPIDense *A = (Mat_MPIDense *) mat->data; 19 int ierr, i, j, rstart = A->rstart, rend = A->rend, row; 20 int roworiented = A->roworiented; 21 22 PetscFunctionBegin; 23 for ( i=0; i<m; i++ ) { 24 if (idxm[i] < 0) continue; 25 if (idxm[i] >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 26 if (idxm[i] >= rstart && idxm[i] < rend) { 27 row = idxm[i] - rstart; 28 if (roworiented) { 29 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 30 } else { 31 for ( j=0; j<n; j++ ) { 32 if (idxn[j] < 0) continue; 33 if (idxn[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 34 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 35 } 36 } 37 } else { 38 if ( !A->donotstash) { 39 if (roworiented) { 40 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 41 } else { 42 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 43 } 44 } 45 } 46 } 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNC__ 51 #define __FUNC__ "MatGetValues_MPIDense" 52 int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 53 { 54 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 55 int ierr, i, j, rstart = mdn->rstart, rend = mdn->rend, row; 56 57 PetscFunctionBegin; 58 for ( i=0; i<m; i++ ) { 59 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 60 if (idxm[i] >= mdn->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 61 if (idxm[i] >= rstart && idxm[i] < rend) { 62 row = idxm[i] - rstart; 63 for ( j=0; j<n; j++ ) { 64 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 65 if (idxn[j] >= mdn->N) { 66 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 67 } 68 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 69 } 70 } else { 71 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 72 } 73 } 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNC__ 78 #define __FUNC__ "MatGetArray_MPIDense" 79 int MatGetArray_MPIDense(Mat A,Scalar **array) 80 { 81 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 82 int ierr; 83 84 PetscFunctionBegin; 85 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNC__ 90 #define __FUNC__ "MatRestoreArray_MPIDense" 91 int MatRestoreArray_MPIDense(Mat A,Scalar **array) 92 { 93 PetscFunctionBegin; 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNC__ 98 #define __FUNC__ "MatAssemblyBegin_MPIDense" 99 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 100 { 101 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 102 MPI_Comm comm = mat->comm; 103 int ierr,nstash,reallocs; 104 InsertMode addv; 105 106 PetscFunctionBegin; 107 /* make sure all processors are either in INSERTMODE or ADDMODE */ 108 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 109 if (addv == (ADD_VALUES|INSERT_VALUES)) { 110 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Cannot mix adds/inserts on different procs"); 111 } 112 mat->insertmode = addv; /* in case this processor had no cache */ 113 114 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 115 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 116 PLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n", 117 nstash,reallocs); 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNC__ 122 #define __FUNC__ "MatAssemblyEnd_MPIDense" 123 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 124 { 125 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 126 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 127 Scalar *val; 128 InsertMode addv=mat->insertmode; 129 130 PetscFunctionBegin; 131 /* wait on receives */ 132 while (1) { 133 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 134 if (!flg) break; 135 136 for ( i=0; i<n; ) { 137 /* Now identify the consecutive vals belonging to the same row */ 138 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 139 if (j < n) ncols = j-i; 140 else ncols = n-i; 141 /* Now assemble all these values with a single function call */ 142 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 143 i = j; 144 } 145 } 146 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 147 148 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 149 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 150 151 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 152 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNC__ 158 #define __FUNC__ "MatZeroEntries_MPIDense" 159 int MatZeroEntries_MPIDense(Mat A) 160 { 161 int ierr; 162 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 163 164 PetscFunctionBegin; 165 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNC__ 170 #define __FUNC__ "MatGetBlockSize_MPIDense" 171 int MatGetBlockSize_MPIDense(Mat A,int *bs) 172 { 173 PetscFunctionBegin; 174 *bs = 1; 175 PetscFunctionReturn(0); 176 } 177 178 /* the code does not do the diagonal entries correctly unless the 179 matrix is square and the column and row owerships are identical. 180 This is a BUG. The only way to fix it seems to be to access 181 mdn->A and mdn->B directly and not through the MatZeroRows() 182 routine. 183 */ 184 #undef __FUNC__ 185 #define __FUNC__ "MatZeroRows_MPIDense" 186 int MatZeroRows_MPIDense(Mat A,IS is,Scalar *diag) 187 { 188 Mat_MPIDense *l = (Mat_MPIDense *) A->data; 189 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 190 int *procs,*nprocs,j,found,idx,nsends,*work; 191 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 192 int *rvalues,tag = A->tag,count,base,slen,n,*source; 193 int *lens,imdex,*lrows,*values; 194 MPI_Comm comm = A->comm; 195 MPI_Request *send_waits,*recv_waits; 196 MPI_Status recv_status,*send_status; 197 IS istmp; 198 199 PetscFunctionBegin; 200 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 201 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 202 203 /* first count number of contributors to each processor */ 204 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 205 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 206 procs = nprocs + size; 207 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 208 for ( i=0; i<N; i++ ) { 209 idx = rows[i]; 210 found = 0; 211 for ( j=0; j<size; j++ ) { 212 if (idx >= owners[j] && idx < owners[j+1]) { 213 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 214 } 215 } 216 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 217 } 218 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 219 220 /* inform other processors of number of messages and max length*/ 221 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 222 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 223 nrecvs = work[rank]; 224 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 225 nmax = work[rank]; 226 PetscFree(work); 227 228 /* post receives: */ 229 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 230 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 231 for ( i=0; i<nrecvs; i++ ) { 232 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 233 } 234 235 /* do sends: 236 1) starts[i] gives the starting index in svalues for stuff going to 237 the ith processor 238 */ 239 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 240 send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 241 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 242 starts[0] = 0; 243 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 244 for ( i=0; i<N; i++ ) { 245 svalues[starts[owner[i]]++] = rows[i]; 246 } 247 ISRestoreIndices(is,&rows); 248 249 starts[0] = 0; 250 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 251 count = 0; 252 for ( i=0; i<size; i++ ) { 253 if (procs[i]) { 254 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 255 } 256 } 257 PetscFree(starts); 258 259 base = owners[rank]; 260 261 /* wait on receives */ 262 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 263 source = lens + nrecvs; 264 count = nrecvs; slen = 0; 265 while (count) { 266 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 267 /* unpack receives into our local space */ 268 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 269 source[imdex] = recv_status.MPI_SOURCE; 270 lens[imdex] = n; 271 slen += n; 272 count--; 273 } 274 PetscFree(recv_waits); 275 276 /* move the data into the send scatter */ 277 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 278 count = 0; 279 for ( i=0; i<nrecvs; i++ ) { 280 values = rvalues + i*nmax; 281 for ( j=0; j<lens[i]; j++ ) { 282 lrows[count++] = values[j] - base; 283 } 284 } 285 PetscFree(rvalues); PetscFree(lens); 286 PetscFree(owner); PetscFree(nprocs); 287 288 /* actually zap the local rows */ 289 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 290 PLogObjectParent(A,istmp); 291 PetscFree(lrows); 292 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 293 ierr = ISDestroy(istmp);CHKERRQ(ierr); 294 295 /* wait on sends */ 296 if (nsends) { 297 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 298 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 299 PetscFree(send_status); 300 } 301 PetscFree(send_waits); PetscFree(svalues); 302 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNC__ 307 #define __FUNC__ "MatMult_MPIDense" 308 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 309 { 310 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 311 int ierr; 312 313 PetscFunctionBegin; 314 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 315 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 316 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 #undef __FUNC__ 321 #define __FUNC__ "MatMultAdd_MPIDense" 322 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 323 { 324 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 325 int ierr; 326 327 PetscFunctionBegin; 328 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 329 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 330 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 334 #undef __FUNC__ 335 #define __FUNC__ "MatMultTrans_MPIDense" 336 int MatMultTrans_MPIDense(Mat A,Vec xx,Vec yy) 337 { 338 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 339 int ierr; 340 Scalar zero = 0.0; 341 342 PetscFunctionBegin; 343 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 344 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 345 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 346 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ "MatMultTransAdd_MPIDense" 352 int MatMultTransAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 353 { 354 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 355 int ierr; 356 357 PetscFunctionBegin; 358 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 359 ierr = MatMultTrans_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 360 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 361 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNC__ 366 #define __FUNC__ "MatGetDiagonal_MPIDense" 367 int MatGetDiagonal_MPIDense(Mat A,Vec v) 368 { 369 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 370 Mat_SeqDense *aloc = (Mat_SeqDense *) a->A->data; 371 int ierr, len, i, n, m = a->m, radd; 372 Scalar *x, zero = 0.0; 373 374 PetscFunctionBegin; 375 VecSet(&zero,v); 376 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 377 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 378 if (n != a->M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming mat and vec"); 379 len = PetscMin(aloc->m,aloc->n); 380 radd = a->rstart*m; 381 for ( i=0; i<len; i++ ) { 382 x[i] = aloc->v[radd + i*m + i]; 383 } 384 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNC__ 389 #define __FUNC__ "MatDestroy_MPIDense" 390 int MatDestroy_MPIDense(Mat mat) 391 { 392 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 393 int ierr; 394 395 PetscFunctionBegin; 396 if (--mat->refct > 0) PetscFunctionReturn(0); 397 398 if (mat->mapping) { 399 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 400 } 401 if (mat->bmapping) { 402 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 403 } 404 #if defined(PETSC_USE_LOG) 405 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mdn->M,mdn->N); 406 #endif 407 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 408 PetscFree(mdn->rowners); 409 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 410 if (mdn->lvec) VecDestroy(mdn->lvec); 411 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 412 if (mdn->factor) { 413 if (mdn->factor->temp) PetscFree(mdn->factor->temp); 414 if (mdn->factor->tag) PetscFree(mdn->factor->tag); 415 if (mdn->factor->pivots) PetscFree(mdn->factor->pivots); 416 PetscFree(mdn->factor); 417 } 418 PetscFree(mdn); 419 if (mat->rmap) { 420 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 421 } 422 if (mat->cmap) { 423 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 424 } 425 PLogObjectDestroy(mat); 426 PetscHeaderDestroy(mat); 427 PetscFunctionReturn(0); 428 } 429 430 #undef __FUNC__ 431 #define __FUNC__ "MatView_MPIDense_Binary" 432 static int MatView_MPIDense_Binary(Mat mat,Viewer viewer) 433 { 434 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 435 int ierr; 436 437 PetscFunctionBegin; 438 if (mdn->size == 1) { 439 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 440 } 441 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 442 PetscFunctionReturn(0); 443 } 444 445 #undef __FUNC__ 446 #define __FUNC__ "MatView_MPIDense_ASCII" 447 static int MatView_MPIDense_ASCII(Mat mat,Viewer viewer) 448 { 449 Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; 450 int ierr, format, size = mdn->size, rank = mdn->rank; 451 FILE *fd; 452 ViewerType vtype; 453 454 PetscFunctionBegin; 455 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 456 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 457 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 458 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 459 MatInfo info; 460 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 461 PetscSequentialPhaseBegin(mat->comm,1); 462 fprintf(fd," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mdn->m, 463 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 464 fflush(fd); 465 PetscSequentialPhaseEnd(mat->comm,1); 466 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 469 PetscFunctionReturn(0); 470 } 471 472 if (size == 1) { 473 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 474 } else { 475 /* assemble the entire matrix onto first processor. */ 476 Mat A; 477 int M = mdn->M, N = mdn->N,m,row,i, nz, *cols; 478 Scalar *vals; 479 Mat_SeqDense *Amdn = (Mat_SeqDense*) mdn->A->data; 480 481 if (!rank) { 482 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 483 } else { 484 ierr = MatCreateMPIDense(mat->comm,0,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 485 } 486 PLogObjectParent(mat,A); 487 488 /* Copy the matrix ... This isn't the most efficient means, 489 but it's quick for now */ 490 row = mdn->rstart; m = Amdn->m; 491 for ( i=0; i<m; i++ ) { 492 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 493 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 494 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 495 row++; 496 } 497 498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 500 if (!rank) { 501 ierr = MatView(((Mat_MPIDense*)(A->data))->A,viewer);CHKERRQ(ierr); 502 } 503 ierr = MatDestroy(A);CHKERRQ(ierr); 504 } 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNC__ 509 #define __FUNC__ "MatView_MPIDense" 510 int MatView_MPIDense(Mat mat,Viewer viewer) 511 { 512 int ierr; 513 ViewerType vtype; 514 515 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 516 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 517 ierr = MatView_MPIDense_ASCII(mat,viewer);CHKERRQ(ierr); 518 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 519 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 520 } else { 521 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 522 } 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNC__ 527 #define __FUNC__ "MatGetInfo_MPIDense" 528 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 529 { 530 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 531 Mat mdn = mat->A; 532 int ierr; 533 double isend[5], irecv[5]; 534 535 PetscFunctionBegin; 536 info->rows_global = (double)mat->M; 537 info->columns_global = (double)mat->N; 538 info->rows_local = (double)mat->m; 539 info->columns_local = (double)mat->N; 540 info->block_size = 1.0; 541 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 542 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 543 isend[3] = info->memory; isend[4] = info->mallocs; 544 if (flag == MAT_LOCAL) { 545 info->nz_used = isend[0]; 546 info->nz_allocated = isend[1]; 547 info->nz_unneeded = isend[2]; 548 info->memory = isend[3]; 549 info->mallocs = isend[4]; 550 } else if (flag == MAT_GLOBAL_MAX) { 551 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 552 info->nz_used = irecv[0]; 553 info->nz_allocated = irecv[1]; 554 info->nz_unneeded = irecv[2]; 555 info->memory = irecv[3]; 556 info->mallocs = irecv[4]; 557 } else if (flag == MAT_GLOBAL_SUM) { 558 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 559 info->nz_used = irecv[0]; 560 info->nz_allocated = irecv[1]; 561 info->nz_unneeded = irecv[2]; 562 info->memory = irecv[3]; 563 info->mallocs = irecv[4]; 564 } 565 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 566 info->fill_ratio_needed = 0; 567 info->factor_mallocs = 0; 568 PetscFunctionReturn(0); 569 } 570 571 /* extern int MatLUFactorSymbolic_MPIDense(Mat,IS,IS,double,Mat*); 572 extern int MatLUFactorNumeric_MPIDense(Mat,Mat*); 573 extern int MatLUFactor_MPIDense(Mat,IS,IS,double); 574 extern int MatSolve_MPIDense(Mat,Vec,Vec); 575 extern int MatSolveAdd_MPIDense(Mat,Vec,Vec,Vec); 576 extern int MatSolveTrans_MPIDense(Mat,Vec,Vec); 577 extern int MatSolveTransAdd_MPIDense(Mat,Vec,Vec,Vec); */ 578 579 #undef __FUNC__ 580 #define __FUNC__ "MatSetOption_MPIDense" 581 int MatSetOption_MPIDense(Mat A,MatOption op) 582 { 583 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 584 585 PetscFunctionBegin; 586 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 587 op == MAT_YES_NEW_NONZERO_LOCATIONS || 588 op == MAT_NEW_NONZERO_LOCATION_ERR || 589 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 590 op == MAT_COLUMNS_SORTED || 591 op == MAT_COLUMNS_UNSORTED) { 592 MatSetOption(a->A,op); 593 } else if (op == MAT_ROW_ORIENTED) { 594 a->roworiented = 1; 595 MatSetOption(a->A,op); 596 } else if (op == MAT_ROWS_SORTED || 597 op == MAT_ROWS_UNSORTED || 598 op == MAT_SYMMETRIC || 599 op == MAT_STRUCTURALLY_SYMMETRIC || 600 op == MAT_YES_NEW_DIAGONALS || 601 op == MAT_USE_HASH_TABLE) { 602 PLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 603 } else if (op == MAT_COLUMN_ORIENTED) { 604 a->roworiented = 0; MatSetOption(a->A,op); 605 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 606 a->donotstash = 1; 607 } else if (op == MAT_NO_NEW_DIAGONALS) { 608 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 609 } else { 610 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 #undef __FUNC__ 616 #define __FUNC__ "MatGetSize_MPIDense" 617 int MatGetSize_MPIDense(Mat A,int *m,int *n) 618 { 619 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 620 621 PetscFunctionBegin; 622 *m = mat->M; *n = mat->N; 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNC__ 627 #define __FUNC__ "MatGetLocalSize_MPIDense" 628 int MatGetLocalSize_MPIDense(Mat A,int *m,int *n) 629 { 630 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 631 632 PetscFunctionBegin; 633 *m = mat->m; *n = mat->N; 634 PetscFunctionReturn(0); 635 } 636 637 #undef __FUNC__ 638 #define __FUNC__ "MatGetOwnershipRange_MPIDense" 639 int MatGetOwnershipRange_MPIDense(Mat A,int *m,int *n) 640 { 641 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 642 643 PetscFunctionBegin; 644 *m = mat->rstart; *n = mat->rend; 645 PetscFunctionReturn(0); 646 } 647 648 #undef __FUNC__ 649 #define __FUNC__ "MatGetRow_MPIDense" 650 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,Scalar **v) 651 { 652 Mat_MPIDense *mat = (Mat_MPIDense *) A->data; 653 int lrow, rstart = mat->rstart, rend = mat->rend,ierr; 654 655 PetscFunctionBegin; 656 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,0,"only local rows") 657 lrow = row - rstart; 658 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 659 PetscFunctionReturn(0); 660 } 661 662 #undef __FUNC__ 663 #define __FUNC__ "MatRestoreRow_MPIDense" 664 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,Scalar **v) 665 { 666 PetscFunctionBegin; 667 if (idx) PetscFree(*idx); 668 if (v) PetscFree(*v); 669 PetscFunctionReturn(0); 670 } 671 672 #undef __FUNC__ 673 #define __FUNC__ "MatNorm_MPIDense" 674 int MatNorm_MPIDense(Mat A,NormType type,double *norm) 675 { 676 Mat_MPIDense *mdn = (Mat_MPIDense *) A->data; 677 Mat_SeqDense *mat = (Mat_SeqDense*) mdn->A->data; 678 int ierr, i, j; 679 double sum = 0.0; 680 Scalar *v = mat->v; 681 682 PetscFunctionBegin; 683 if (mdn->size == 1) { 684 ierr = MatNorm(mdn->A,type,norm);CHKERRQ(ierr); 685 } else { 686 if (type == NORM_FROBENIUS) { 687 for (i=0; i<mat->n*mat->m; i++ ) { 688 #if defined(PETSC_USE_COMPLEX) 689 sum += PetscReal(PetscConj(*v)*(*v)); v++; 690 #else 691 sum += (*v)*(*v); v++; 692 #endif 693 } 694 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 695 *norm = sqrt(*norm); 696 PLogFlops(2*mat->n*mat->m); 697 } else if (type == NORM_1) { 698 double *tmp, *tmp2; 699 tmp = (double *) PetscMalloc( 2*mdn->N*sizeof(double) );CHKPTRQ(tmp); 700 tmp2 = tmp + mdn->N; 701 ierr = PetscMemzero(tmp,2*mdn->N*sizeof(double));CHKERRQ(ierr); 702 *norm = 0.0; 703 v = mat->v; 704 for ( j=0; j<mat->n; j++ ) { 705 for ( i=0; i<mat->m; i++ ) { 706 tmp[j] += PetscAbsScalar(*v); v++; 707 } 708 } 709 ierr = MPI_Allreduce(tmp,tmp2,mdn->N,MPI_DOUBLE,MPI_SUM,A->comm);CHKERRQ(ierr); 710 for ( j=0; j<mdn->N; j++ ) { 711 if (tmp2[j] > *norm) *norm = tmp2[j]; 712 } 713 PetscFree(tmp); 714 PLogFlops(mat->n*mat->m); 715 } else if (type == NORM_INFINITY) { /* max row norm */ 716 double ntemp; 717 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 718 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,A->comm);CHKERRQ(ierr); 719 } else { 720 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 721 } 722 } 723 PetscFunctionReturn(0); 724 } 725 726 #undef __FUNC__ 727 #define __FUNC__ "MatTranspose_MPIDense" 728 int MatTranspose_MPIDense(Mat A,Mat *matout) 729 { 730 Mat_MPIDense *a = (Mat_MPIDense *) A->data; 731 Mat_SeqDense *Aloc = (Mat_SeqDense *) a->A->data; 732 Mat B; 733 int M = a->M, N = a->N, m, n, *rwork, rstart = a->rstart; 734 int j, i, ierr; 735 Scalar *v; 736 737 PetscFunctionBegin; 738 if (matout == PETSC_NULL && M != N) { 739 SETERRQ(PETSC_ERR_SUP,0,"Supports square matrix only in-place"); 740 } 741 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 742 743 m = Aloc->m; n = Aloc->n; v = Aloc->v; 744 rwork = (int *) PetscMalloc(n*sizeof(int));CHKPTRQ(rwork); 745 for ( j=0; j<n; j++ ) { 746 for (i=0; i<m; i++) rwork[i] = rstart + i; 747 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 748 v += m; 749 } 750 PetscFree(rwork); 751 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 752 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 753 if (matout != PETSC_NULL) { 754 *matout = B; 755 } else { 756 PetscOps *Abops; 757 MatOps Aops; 758 759 /* This isn't really an in-place transpose, but free data struct from a */ 760 PetscFree(a->rowners); 761 ierr = MatDestroy(a->A);CHKERRQ(ierr); 762 if (a->lvec) VecDestroy(a->lvec); 763 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 764 PetscFree(a); 765 766 /* 767 This is horrible, horrible code. We need to keep the 768 A pointers for the bops and ops but copy everything 769 else from C. 770 */ 771 Abops = A->bops; 772 Aops = A->ops; 773 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 774 A->bops = Abops; 775 A->ops = Aops; 776 777 PetscHeaderDestroy(B); 778 } 779 PetscFunctionReturn(0); 780 } 781 782 #include "pinclude/blaslapack.h" 783 #undef __FUNC__ 784 #define __FUNC__ "MatScale_MPIDense" 785 int MatScale_MPIDense(Scalar *alpha,Mat inA) 786 { 787 Mat_MPIDense *A = (Mat_MPIDense *) inA->data; 788 Mat_SeqDense *a = (Mat_SeqDense *) A->A->data; 789 int one = 1, nz; 790 791 PetscFunctionBegin; 792 nz = a->m*a->n; 793 BLscal_( &nz, alpha, a->v, &one ); 794 PLogFlops(nz); 795 PetscFunctionReturn(0); 796 } 797 798 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 799 extern int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **); 800 801 /* -------------------------------------------------------------------*/ 802 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 803 MatGetRow_MPIDense, 804 MatRestoreRow_MPIDense, 805 MatMult_MPIDense, 806 MatMultAdd_MPIDense, 807 MatMultTrans_MPIDense, 808 MatMultTransAdd_MPIDense, 809 0, 810 0, 811 0, 812 0, 813 0, 814 0, 815 0, 816 MatTranspose_MPIDense, 817 MatGetInfo_MPIDense,0, 818 MatGetDiagonal_MPIDense, 819 0, 820 MatNorm_MPIDense, 821 MatAssemblyBegin_MPIDense, 822 MatAssemblyEnd_MPIDense, 823 0, 824 MatSetOption_MPIDense, 825 MatZeroEntries_MPIDense, 826 MatZeroRows_MPIDense, 827 0, 828 0, 829 0, 830 0, 831 MatGetSize_MPIDense, 832 MatGetLocalSize_MPIDense, 833 MatGetOwnershipRange_MPIDense, 834 0, 835 0, 836 MatGetArray_MPIDense, 837 MatRestoreArray_MPIDense, 838 MatDuplicate_MPIDense, 839 0, 840 0, 841 0, 842 0, 843 0, 844 MatGetSubMatrices_MPIDense, 845 0, 846 MatGetValues_MPIDense, 847 0, 848 0, 849 MatScale_MPIDense, 850 0, 851 0, 852 0, 853 MatGetBlockSize_MPIDense, 854 0, 855 0, 856 0, 857 0, 858 0, 859 0, 860 0, 861 0, 862 0, 863 0, 864 0, 865 0, 866 MatGetMaps_Petsc}; 867 868 #undef __FUNC__ 869 #define __FUNC__ "MatCreateMPIDense" 870 /*@C 871 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 872 873 Collective on MPI_Comm 874 875 Input Parameters: 876 + comm - MPI communicator 877 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 878 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 879 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 880 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 881 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 882 to control all matrix memory allocation. 883 884 Output Parameter: 885 . A - the matrix 886 887 Notes: 888 The dense format is fully compatible with standard Fortran 77 889 storage by columns. 890 891 The data input variable is intended primarily for Fortran programmers 892 who wish to allocate their own matrix memory space. Most users should 893 set data=PETSC_NULL. 894 895 The user MUST specify either the local or global matrix dimensions 896 (possibly both). 897 898 Currently, the only parallel dense matrix decomposition is by rows, 899 so that n=N and each submatrix owns all of the global columns. 900 901 Level: intermediate 902 903 .keywords: matrix, dense, parallel 904 905 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 906 @*/ 907 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,Scalar *data,Mat *A) 908 { 909 Mat mat; 910 Mat_MPIDense *a; 911 int ierr, i,flg; 912 913 PetscFunctionBegin; 914 /* Note: For now, when data is specified above, this assumes the user correctly 915 allocates the local dense storage space. We should add error checking. */ 916 917 *A = 0; 918 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",comm,MatDestroy,MatView); 919 PLogObjectCreate(mat); 920 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 921 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 922 mat->ops->destroy = MatDestroy_MPIDense; 923 mat->ops->view = MatView_MPIDense; 924 mat->factor = 0; 925 mat->mapping = 0; 926 927 a->factor = 0; 928 mat->insertmode = NOT_SET_VALUES; 929 ierr = MPI_Comm_rank(comm,&a->rank);CHKERRQ(ierr); 930 ierr = MPI_Comm_size(comm,&a->size);CHKERRQ(ierr); 931 932 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 933 934 /* 935 The computation of n is wrong below, n should represent the number of local 936 rows in the right (column vector) 937 */ 938 939 /* each row stores all columns */ 940 if (N == PETSC_DECIDE) N = n; 941 if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);} 942 /* if (n != N) SETERRQ(PETSC_ERR_SUP,0,"For now, only n=N is supported"); */ 943 a->N = mat->N = N; 944 a->M = mat->M = M; 945 a->m = mat->m = m; 946 a->n = mat->n = n; 947 948 /* the information in the maps duplicates the information computed below, eventually 949 we should remove the duplicate information that is not contained in the maps */ 950 ierr = MapCreateMPI(comm,m,M,&mat->rmap);CHKERRQ(ierr); 951 ierr = MapCreateMPI(comm,PETSC_DECIDE,N,&mat->cmap);CHKERRQ(ierr); 952 953 /* build local table of row and column ownerships */ 954 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 955 a->cowners = a->rowners + a->size + 1; 956 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 957 ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 958 a->rowners[0] = 0; 959 for ( i=2; i<=a->size; i++ ) { 960 a->rowners[i] += a->rowners[i-1]; 961 } 962 a->rstart = a->rowners[a->rank]; 963 a->rend = a->rowners[a->rank+1]; 964 ierr = MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 965 a->cowners[0] = 0; 966 for ( i=2; i<=a->size; i++ ) { 967 a->cowners[i] += a->cowners[i-1]; 968 } 969 970 ierr = MatCreateSeqDense(PETSC_COMM_SELF,m,N,data,&a->A);CHKERRQ(ierr); 971 PLogObjectParent(mat,a->A); 972 973 /* build cache for off array entries formed */ 974 a->donotstash = 0; 975 ierr = MatStashCreate_Private(comm,1,&mat->stash);CHKERRQ(ierr); 976 977 /* stuff used for matrix vector multiply */ 978 a->lvec = 0; 979 a->Mvctx = 0; 980 a->roworiented = 1; 981 982 *A = mat; 983 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 984 if (flg) { 985 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 986 } 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNC__ 991 #define __FUNC__ "MatDuplicate_MPIDense" 992 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 993 { 994 Mat mat; 995 Mat_MPIDense *a,*oldmat = (Mat_MPIDense *) A->data; 996 int ierr; 997 FactorCtx *factor; 998 999 PetscFunctionBegin; 1000 *newmat = 0; 1001 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIDENSE,"Mat",A->comm,MatDestroy,MatView); 1002 PLogObjectCreate(mat); 1003 mat->data = (void *) (a = PetscNew(Mat_MPIDense));CHKPTRQ(a); 1004 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1005 mat->ops->destroy = MatDestroy_MPIDense; 1006 mat->ops->view = MatView_MPIDense; 1007 mat->factor = A->factor; 1008 mat->assembled = PETSC_TRUE; 1009 1010 a->m = mat->m = oldmat->m; 1011 a->n = mat->n = oldmat->n; 1012 a->M = mat->M = oldmat->M; 1013 a->N = mat->N = oldmat->N; 1014 if (oldmat->factor) { 1015 a->factor = (FactorCtx *) (factor = PetscNew(FactorCtx));CHKPTRQ(factor); 1016 /* copy factor contents ... add this code! */ 1017 } else a->factor = 0; 1018 1019 a->rstart = oldmat->rstart; 1020 a->rend = oldmat->rend; 1021 a->size = oldmat->size; 1022 a->rank = oldmat->rank; 1023 mat->insertmode = NOT_SET_VALUES; 1024 a->donotstash = oldmat->donotstash; 1025 a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int));CHKPTRQ(a->rowners); 1026 PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1027 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1028 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1029 1030 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1031 PLogObjectParent(mat,a->lvec); 1032 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1033 PLogObjectParent(mat,a->Mvctx); 1034 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1035 PLogObjectParent(mat,a->A); 1036 *newmat = mat; 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #include "sys.h" 1041 1042 #undef __FUNC__ 1043 #define __FUNC__ "MatLoad_MPIDense_DenseInFile" 1044 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M, int N, Mat *newmat) 1045 { 1046 int *rowners, i,size,rank,m,ierr,nz,j; 1047 Scalar *array,*vals,*vals_ptr; 1048 MPI_Status status; 1049 1050 PetscFunctionBegin; 1051 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1052 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1053 1054 /* determine ownership of all rows */ 1055 m = M/size + ((M % size) > rank); 1056 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1057 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1058 rowners[0] = 0; 1059 for ( i=2; i<=size; i++ ) { 1060 rowners[i] += rowners[i-1]; 1061 } 1062 1063 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1064 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1065 1066 if (!rank) { 1067 vals = (Scalar *) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1068 1069 /* read in my part of the matrix numerical values */ 1070 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1071 1072 /* insert into matrix-by row (this is why cannot directly read into array */ 1073 vals_ptr = vals; 1074 for ( i=0; i<m; i++ ) { 1075 for ( j=0; j<N; j++ ) { 1076 array[i + j*m] = *vals_ptr++; 1077 } 1078 } 1079 1080 /* read in other processors and ship out */ 1081 for ( i=1; i<size; i++ ) { 1082 nz = (rowners[i+1] - rowners[i])*N; 1083 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1084 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1085 } 1086 } else { 1087 /* receive numeric values */ 1088 vals = (Scalar*) PetscMalloc( m*N*sizeof(Scalar) );CHKPTRQ(vals); 1089 1090 /* receive message of values*/ 1091 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1092 1093 /* insert into matrix-by row (this is why cannot directly read into array */ 1094 vals_ptr = vals; 1095 for ( i=0; i<m; i++ ) { 1096 for ( j=0; j<N; j++ ) { 1097 array[i + j*m] = *vals_ptr++; 1098 } 1099 } 1100 } 1101 PetscFree(rowners); 1102 PetscFree(vals); 1103 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1104 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 1109 #undef __FUNC__ 1110 #define __FUNC__ "MatLoad_MPIDense" 1111 int MatLoad_MPIDense(Viewer viewer,MatType type,Mat *newmat) 1112 { 1113 Mat A; 1114 Scalar *vals,*svals; 1115 MPI_Comm comm = ((PetscObject)viewer)->comm; 1116 MPI_Status status; 1117 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1118 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1119 int tag = ((PetscObject)viewer)->tag; 1120 int i, nz, ierr, j,rstart, rend, fd; 1121 1122 PetscFunctionBegin; 1123 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1124 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1125 if (!rank) { 1126 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1127 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1128 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1129 } 1130 1131 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1132 M = header[1]; N = header[2]; nz = header[3]; 1133 1134 /* 1135 Handle case where matrix is stored on disk as a dense matrix 1136 */ 1137 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1138 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 /* determine ownership of all rows */ 1143 m = M/size + ((M % size) > rank); 1144 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1145 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1146 rowners[0] = 0; 1147 for ( i=2; i<=size; i++ ) { 1148 rowners[i] += rowners[i-1]; 1149 } 1150 rstart = rowners[rank]; 1151 rend = rowners[rank+1]; 1152 1153 /* distribute row lengths to all processors */ 1154 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1155 offlens = ourlens + (rend-rstart); 1156 if (!rank) { 1157 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1158 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1159 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1160 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1161 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1162 PetscFree(sndcounts); 1163 } else { 1164 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1165 } 1166 1167 if (!rank) { 1168 /* calculate the number of nonzeros on each processor */ 1169 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1170 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1171 for ( i=0; i<size; i++ ) { 1172 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1173 procsnz[i] += rowlengths[j]; 1174 } 1175 } 1176 PetscFree(rowlengths); 1177 1178 /* determine max buffer needed and allocate it */ 1179 maxnz = 0; 1180 for ( i=0; i<size; i++ ) { 1181 maxnz = PetscMax(maxnz,procsnz[i]); 1182 } 1183 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1184 1185 /* read in my part of the matrix column indices */ 1186 nz = procsnz[0]; 1187 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1188 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1189 1190 /* read in every one elses and ship off */ 1191 for ( i=1; i<size; i++ ) { 1192 nz = procsnz[i]; 1193 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1194 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1195 } 1196 PetscFree(cols); 1197 } else { 1198 /* determine buffer space needed for message */ 1199 nz = 0; 1200 for ( i=0; i<m; i++ ) { 1201 nz += ourlens[i]; 1202 } 1203 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1204 1205 /* receive message of column indices*/ 1206 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1207 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1208 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1209 } 1210 1211 /* loop over local rows, determining number of off diagonal entries */ 1212 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1213 jj = 0; 1214 for ( i=0; i<m; i++ ) { 1215 for ( j=0; j<ourlens[i]; j++ ) { 1216 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1217 jj++; 1218 } 1219 } 1220 1221 /* create our matrix */ 1222 for ( i=0; i<m; i++ ) { 1223 ourlens[i] -= offlens[i]; 1224 } 1225 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1226 A = *newmat; 1227 for ( i=0; i<m; i++ ) { 1228 ourlens[i] += offlens[i]; 1229 } 1230 1231 if (!rank) { 1232 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 1233 1234 /* read in my part of the matrix numerical values */ 1235 nz = procsnz[0]; 1236 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1237 1238 /* insert into matrix */ 1239 jj = rstart; 1240 smycols = mycols; 1241 svals = vals; 1242 for ( i=0; i<m; i++ ) { 1243 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1244 smycols += ourlens[i]; 1245 svals += ourlens[i]; 1246 jj++; 1247 } 1248 1249 /* read in other processors and ship out */ 1250 for ( i=1; i<size; i++ ) { 1251 nz = procsnz[i]; 1252 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1253 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1254 } 1255 PetscFree(procsnz); 1256 } else { 1257 /* receive numeric values */ 1258 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 1259 1260 /* receive message of values*/ 1261 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1262 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1263 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1264 1265 /* insert into matrix */ 1266 jj = rstart; 1267 smycols = mycols; 1268 svals = vals; 1269 for ( i=0; i<m; i++ ) { 1270 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1271 smycols += ourlens[i]; 1272 svals += ourlens[i]; 1273 jj++; 1274 } 1275 } 1276 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1277 1278 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1279 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1280 PetscFunctionReturn(0); 1281 } 1282 1283 1284 1285 1286 1287