1 /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/ 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" 8 9 EXTERN_C_BEGIN 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 12 int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 13 { 14 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 15 int m = A->m,rstart = mdn->rstart,ierr; 16 PetscScalar *array; 17 MPI_Comm comm; 18 19 PetscFunctionBegin; 20 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 21 22 /* The reuse aspect is not implemented efficiently */ 23 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 24 25 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 26 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 27 ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr); 28 ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr); 29 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 30 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 31 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 32 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33 34 *iscopy = PETSC_TRUE; 35 PetscFunctionReturn(0); 36 } 37 EXTERN_C_END 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatSetValues_MPIDense" 41 int MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv) 42 { 43 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 44 int ierr,i,j,rstart = A->rstart,rend = A->rend,row; 45 PetscTruth roworiented = A->roworiented; 46 47 PetscFunctionBegin; 48 for (i=0; i<m; i++) { 49 if (idxm[i] < 0) continue; 50 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 51 if (idxm[i] >= rstart && idxm[i] < rend) { 52 row = idxm[i] - rstart; 53 if (roworiented) { 54 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 55 } else { 56 for (j=0; j<n; j++) { 57 if (idxn[j] < 0) continue; 58 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 59 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 60 } 61 } 62 } else { 63 if (!A->donotstash) { 64 if (roworiented) { 65 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 66 } else { 67 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 68 } 69 } 70 } 71 } 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatGetValues_MPIDense" 77 int MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 78 { 79 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 80 int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row; 81 82 PetscFunctionBegin; 83 for (i=0; i<m; i++) { 84 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 85 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 86 if (idxm[i] >= rstart && idxm[i] < rend) { 87 row = idxm[i] - rstart; 88 for (j=0; j<n; j++) { 89 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 90 if (idxn[j] >= mat->N) { 91 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 92 } 93 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 94 } 95 } else { 96 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 97 } 98 } 99 PetscFunctionReturn(0); 100 } 101 102 #undef __FUNCT__ 103 #define __FUNCT__ "MatGetArray_MPIDense" 104 int MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 105 { 106 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 107 int ierr; 108 109 PetscFunctionBegin; 110 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 116 static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B) 117 { 118 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 119 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 120 int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 121 PetscScalar *av,*bv,*v = lmat->v; 122 Mat newmat; 123 124 PetscFunctionBegin; 125 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 126 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 127 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 128 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 129 130 /* No parallel redistribution currently supported! Should really check each index set 131 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 132 original matrix! */ 133 134 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 136 137 /* Check submatrix call */ 138 if (scall == MAT_REUSE_MATRIX) { 139 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 140 /* Really need to test rows and column sizes! */ 141 newmat = *B; 142 } else { 143 /* Create and fill new matrix */ 144 ierr = MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);CHKERRQ(ierr); 145 } 146 147 /* Now extract the data pointers and do the copy, column at a time */ 148 newmatd = (Mat_MPIDense*)newmat->data; 149 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 150 151 for (i=0; i<ncols; i++) { 152 av = v + nlrows*icol[i]; 153 for (j=0; j<nrows; j++) { 154 *bv++ = av[irow[j] - rstart]; 155 } 156 } 157 158 /* Assemble the matrices so that the correct flags are set */ 159 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 161 162 /* Free work space */ 163 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 164 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 165 *B = newmat; 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatRestoreArray_MPIDense" 171 int MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 172 { 173 PetscFunctionBegin; 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNCT__ 178 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 179 int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 180 { 181 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 182 MPI_Comm comm = mat->comm; 183 int ierr,nstash,reallocs; 184 InsertMode addv; 185 186 PetscFunctionBegin; 187 /* make sure all processors are either in INSERTMODE or ADDMODE */ 188 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 189 if (addv == (ADD_VALUES|INSERT_VALUES)) { 190 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 191 } 192 mat->insertmode = addv; /* in case this processor had no cache */ 193 194 ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr); 195 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 196 PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 202 int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 203 { 204 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 205 int i,n,ierr,*row,*col,flg,j,rstart,ncols; 206 PetscScalar *val; 207 InsertMode addv=mat->insertmode; 208 209 PetscFunctionBegin; 210 /* wait on receives */ 211 while (1) { 212 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 213 if (!flg) break; 214 215 for (i=0; i<n;) { 216 /* Now identify the consecutive vals belonging to the same row */ 217 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 218 if (j < n) ncols = j-i; 219 else ncols = n-i; 220 /* Now assemble all these values with a single function call */ 221 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 222 i = j; 223 } 224 } 225 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 226 227 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 228 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 229 230 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 231 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 232 } 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatZeroEntries_MPIDense" 238 int MatZeroEntries_MPIDense(Mat A) 239 { 240 int ierr; 241 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 242 243 PetscFunctionBegin; 244 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatGetBlockSize_MPIDense" 250 int MatGetBlockSize_MPIDense(Mat A,int *bs) 251 { 252 PetscFunctionBegin; 253 *bs = 1; 254 PetscFunctionReturn(0); 255 } 256 257 /* the code does not do the diagonal entries correctly unless the 258 matrix is square and the column and row owerships are identical. 259 This is a BUG. The only way to fix it seems to be to access 260 mdn->A and mdn->B directly and not through the MatZeroRows() 261 routine. 262 */ 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatZeroRows_MPIDense" 265 int MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag) 266 { 267 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 268 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 269 int *nprocs,j,idx,nsends; 270 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 271 int *rvalues,tag = A->tag,count,base,slen,n,*source; 272 int *lens,imdex,*lrows,*values; 273 MPI_Comm comm = A->comm; 274 MPI_Request *send_waits,*recv_waits; 275 MPI_Status recv_status,*send_status; 276 IS istmp; 277 PetscTruth found; 278 279 PetscFunctionBegin; 280 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 281 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 282 283 /* first count number of contributors to each processor */ 284 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 285 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 286 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 287 for (i=0; i<N; i++) { 288 idx = rows[i]; 289 found = PETSC_FALSE; 290 for (j=0; j<size; j++) { 291 if (idx >= owners[j] && idx < owners[j+1]) { 292 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 293 } 294 } 295 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 296 } 297 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 298 299 /* inform other processors of number of messages and max length*/ 300 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 301 302 /* post receives: */ 303 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 304 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 305 for (i=0; i<nrecvs; i++) { 306 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 307 } 308 309 /* do sends: 310 1) starts[i] gives the starting index in svalues for stuff going to 311 the ith processor 312 */ 313 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 314 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 315 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 316 starts[0] = 0; 317 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 318 for (i=0; i<N; i++) { 319 svalues[starts[owner[i]]++] = rows[i]; 320 } 321 ISRestoreIndices(is,&rows); 322 323 starts[0] = 0; 324 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 325 count = 0; 326 for (i=0; i<size; i++) { 327 if (nprocs[2*i+1]) { 328 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 329 } 330 } 331 ierr = PetscFree(starts);CHKERRQ(ierr); 332 333 base = owners[rank]; 334 335 /* wait on receives */ 336 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 337 source = lens + nrecvs; 338 count = nrecvs; slen = 0; 339 while (count) { 340 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 341 /* unpack receives into our local space */ 342 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 343 source[imdex] = recv_status.MPI_SOURCE; 344 lens[imdex] = n; 345 slen += n; 346 count--; 347 } 348 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 349 350 /* move the data into the send scatter */ 351 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 352 count = 0; 353 for (i=0; i<nrecvs; i++) { 354 values = rvalues + i*nmax; 355 for (j=0; j<lens[i]; j++) { 356 lrows[count++] = values[j] - base; 357 } 358 } 359 ierr = PetscFree(rvalues);CHKERRQ(ierr); 360 ierr = PetscFree(lens);CHKERRQ(ierr); 361 ierr = PetscFree(owner);CHKERRQ(ierr); 362 ierr = PetscFree(nprocs);CHKERRQ(ierr); 363 364 /* actually zap the local rows */ 365 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 366 PetscLogObjectParent(A,istmp); 367 ierr = PetscFree(lrows);CHKERRQ(ierr); 368 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 369 ierr = ISDestroy(istmp);CHKERRQ(ierr); 370 371 /* wait on sends */ 372 if (nsends) { 373 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 374 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 375 ierr = PetscFree(send_status);CHKERRQ(ierr); 376 } 377 ierr = PetscFree(send_waits);CHKERRQ(ierr); 378 ierr = PetscFree(svalues);CHKERRQ(ierr); 379 380 PetscFunctionReturn(0); 381 } 382 383 #undef __FUNCT__ 384 #define __FUNCT__ "MatMult_MPIDense" 385 int MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 386 { 387 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 388 int ierr; 389 390 PetscFunctionBegin; 391 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 392 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 393 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "MatMultAdd_MPIDense" 399 int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 400 { 401 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 402 int ierr; 403 404 PetscFunctionBegin; 405 ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 406 ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 407 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatMultTranspose_MPIDense" 413 int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 414 { 415 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 416 int ierr; 417 PetscScalar zero = 0.0; 418 419 PetscFunctionBegin; 420 ierr = VecSet(&zero,yy);CHKERRQ(ierr); 421 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 422 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 423 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 429 int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 430 { 431 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 432 int ierr; 433 434 PetscFunctionBegin; 435 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 436 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 437 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 438 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatGetDiagonal_MPIDense" 444 int MatGetDiagonal_MPIDense(Mat A,Vec v) 445 { 446 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 447 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 448 int ierr,len,i,n,m = A->m,radd; 449 PetscScalar *x,zero = 0.0; 450 451 PetscFunctionBegin; 452 ierr = VecSet(&zero,v);CHKERRQ(ierr); 453 ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr); 454 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 455 if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 456 len = PetscMin(a->A->m,a->A->n); 457 radd = a->rstart*m; 458 for (i=0; i<len; i++) { 459 x[i] = aloc->v[radd + i*m + i]; 460 } 461 ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatDestroy_MPIDense" 467 int MatDestroy_MPIDense(Mat mat) 468 { 469 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 470 int ierr; 471 472 PetscFunctionBegin; 473 474 #if defined(PETSC_USE_LOG) 475 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 476 #endif 477 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 478 ierr = PetscFree(mdn->rowners);CHKERRQ(ierr); 479 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 480 if (mdn->lvec) VecDestroy(mdn->lvec); 481 if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx); 482 if (mdn->factor) { 483 if (mdn->factor->temp) {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);} 484 if (mdn->factor->tag) {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);} 485 if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);} 486 ierr = PetscFree(mdn->factor);CHKERRQ(ierr); 487 } 488 ierr = PetscFree(mdn);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatView_MPIDense_Binary" 494 static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 495 { 496 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 497 int ierr; 498 499 PetscFunctionBegin; 500 if (mdn->size == 1) { 501 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 502 } 503 else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported"); 504 PetscFunctionReturn(0); 505 } 506 507 #undef __FUNCT__ 508 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 509 static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 510 { 511 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 512 int ierr,size = mdn->size,rank = mdn->rank; 513 PetscViewerType vtype; 514 PetscTruth isascii,isdraw; 515 PetscViewer sviewer; 516 PetscViewerFormat format; 517 518 PetscFunctionBegin; 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 520 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 521 if (isascii) { 522 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 523 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 524 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 525 MatInfo info; 526 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 527 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m, 528 (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 529 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 530 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } else if (format == PETSC_VIEWER_ASCII_INFO) { 533 PetscFunctionReturn(0); 534 } 535 } else if (isdraw) { 536 PetscDraw draw; 537 PetscTruth isnull; 538 539 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 540 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 541 if (isnull) PetscFunctionReturn(0); 542 } 543 544 if (size == 1) { 545 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 546 } else { 547 /* assemble the entire matrix onto first processor. */ 548 Mat A; 549 int M = mat->M,N = mat->N,m,row,i,nz,*cols; 550 PetscScalar *vals; 551 552 if (!rank) { 553 ierr = MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 554 } else { 555 ierr = MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);CHKERRQ(ierr); 556 } 557 PetscLogObjectParent(mat,A); 558 559 /* Copy the matrix ... This isn't the most efficient means, 560 but it's quick for now */ 561 row = mdn->rstart; m = mdn->A->m; 562 for (i=0; i<m; i++) { 563 ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 564 ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 565 ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 566 row++; 567 } 568 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 572 if (!rank) { 573 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 574 } 575 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 576 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 577 ierr = MatDestroy(A);CHKERRQ(ierr); 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense" 584 int MatView_MPIDense(Mat mat,PetscViewer viewer) 585 { 586 int ierr; 587 PetscTruth isascii,isbinary,isdraw,issocket; 588 589 PetscFunctionBegin; 590 591 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 592 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 593 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 594 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 595 596 if (isascii || issocket || isdraw) { 597 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 598 } else if (isbinary) { 599 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 600 } else { 601 SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 602 } 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatGetInfo_MPIDense" 608 int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 609 { 610 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 611 Mat mdn = mat->A; 612 int ierr; 613 PetscReal isend[5],irecv[5]; 614 615 PetscFunctionBegin; 616 info->rows_global = (double)A->M; 617 info->columns_global = (double)A->N; 618 info->rows_local = (double)A->m; 619 info->columns_local = (double)A->N; 620 info->block_size = 1.0; 621 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 622 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 623 isend[3] = info->memory; isend[4] = info->mallocs; 624 if (flag == MAT_LOCAL) { 625 info->nz_used = isend[0]; 626 info->nz_allocated = isend[1]; 627 info->nz_unneeded = isend[2]; 628 info->memory = isend[3]; 629 info->mallocs = isend[4]; 630 } else if (flag == MAT_GLOBAL_MAX) { 631 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 632 info->nz_used = irecv[0]; 633 info->nz_allocated = irecv[1]; 634 info->nz_unneeded = irecv[2]; 635 info->memory = irecv[3]; 636 info->mallocs = irecv[4]; 637 } else if (flag == MAT_GLOBAL_SUM) { 638 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 639 info->nz_used = irecv[0]; 640 info->nz_allocated = irecv[1]; 641 info->nz_unneeded = irecv[2]; 642 info->memory = irecv[3]; 643 info->mallocs = irecv[4]; 644 } 645 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 646 info->fill_ratio_needed = 0; 647 info->factor_mallocs = 0; 648 PetscFunctionReturn(0); 649 } 650 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatSetOption_MPIDense" 653 int MatSetOption_MPIDense(Mat A,MatOption op) 654 { 655 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 656 int ierr; 657 658 PetscFunctionBegin; 659 switch (op) { 660 case MAT_NO_NEW_NONZERO_LOCATIONS: 661 case MAT_YES_NEW_NONZERO_LOCATIONS: 662 case MAT_NEW_NONZERO_LOCATION_ERR: 663 case MAT_NEW_NONZERO_ALLOCATION_ERR: 664 case MAT_COLUMNS_SORTED: 665 case MAT_COLUMNS_UNSORTED: 666 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 667 break; 668 case MAT_ROW_ORIENTED: 669 a->roworiented = PETSC_TRUE; 670 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 671 break; 672 case MAT_ROWS_SORTED: 673 case MAT_ROWS_UNSORTED: 674 case MAT_YES_NEW_DIAGONALS: 675 case MAT_USE_HASH_TABLE: 676 PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n"); 677 break; 678 case MAT_COLUMN_ORIENTED: 679 a->roworiented = PETSC_FALSE; 680 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 681 break; 682 case MAT_IGNORE_OFF_PROC_ENTRIES: 683 a->donotstash = PETSC_TRUE; 684 break; 685 case MAT_NO_NEW_DIAGONALS: 686 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 687 case MAT_SYMMETRIC: 688 case MAT_STRUCTURALLY_SYMMETRIC: 689 case MAT_NOT_SYMMETRIC: 690 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 691 case MAT_HERMITIAN: 692 case MAT_NOT_HERMITIAN: 693 case MAT_SYMMETRY_ETERNAL: 694 case MAT_NOT_SYMMETRY_ETERNAL: 695 break; 696 default: 697 SETERRQ(PETSC_ERR_SUP,"unknown option"); 698 } 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatGetRow_MPIDense" 704 int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v) 705 { 706 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 707 int lrow,rstart = mat->rstart,rend = mat->rend,ierr; 708 709 PetscFunctionBegin; 710 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 711 lrow = row - rstart; 712 ierr = MatGetRow(mat->A,lrow,nz,idx,v);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatRestoreRow_MPIDense" 718 int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 719 { 720 int ierr; 721 722 PetscFunctionBegin; 723 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 724 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDiagonalScale_MPIDense" 730 int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 731 { 732 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 733 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 734 PetscScalar *l,*r,x,*v; 735 int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n; 736 737 PetscFunctionBegin; 738 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 739 if (ll) { 740 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 741 if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size"); 742 ierr = VecGetArrayFast(ll,&l);CHKERRQ(ierr); 743 for (i=0; i<m; i++) { 744 x = l[i]; 745 v = mat->v + i; 746 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 747 } 748 ierr = VecRestoreArrayFast(ll,&l);CHKERRQ(ierr); 749 PetscLogFlops(n*m); 750 } 751 if (rr) { 752 ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr); 753 if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size"); 754 ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 755 ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr); 756 ierr = VecGetArrayFast(mdn->lvec,&r);CHKERRQ(ierr); 757 for (i=0; i<n; i++) { 758 x = r[i]; 759 v = mat->v + i*m; 760 for (j=0; j<m; j++) { (*v++) *= x;} 761 } 762 ierr = VecRestoreArrayFast(mdn->lvec,&r);CHKERRQ(ierr); 763 PetscLogFlops(n*m); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatNorm_MPIDense" 770 int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 771 { 772 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 773 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 774 int ierr,i,j; 775 PetscReal sum = 0.0; 776 PetscScalar *v = mat->v; 777 778 PetscFunctionBegin; 779 if (mdn->size == 1) { 780 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 781 } else { 782 if (type == NORM_FROBENIUS) { 783 for (i=0; i<mdn->A->n*mdn->A->m; i++) { 784 #if defined(PETSC_USE_COMPLEX) 785 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 786 #else 787 sum += (*v)*(*v); v++; 788 #endif 789 } 790 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 791 *nrm = sqrt(*nrm); 792 PetscLogFlops(2*mdn->A->n*mdn->A->m); 793 } else if (type == NORM_1) { 794 PetscReal *tmp,*tmp2; 795 ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 796 tmp2 = tmp + A->N; 797 ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr); 798 *nrm = 0.0; 799 v = mat->v; 800 for (j=0; j<mdn->A->n; j++) { 801 for (i=0; i<mdn->A->m; i++) { 802 tmp[j] += PetscAbsScalar(*v); v++; 803 } 804 } 805 ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr); 806 for (j=0; j<A->N; j++) { 807 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 808 } 809 ierr = PetscFree(tmp);CHKERRQ(ierr); 810 PetscLogFlops(A->n*A->m); 811 } else if (type == NORM_INFINITY) { /* max row norm */ 812 PetscReal ntemp; 813 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 814 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr); 815 } else { 816 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 817 } 818 } 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatTranspose_MPIDense" 824 int MatTranspose_MPIDense(Mat A,Mat *matout) 825 { 826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 827 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 828 Mat B; 829 int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart; 830 int j,i,ierr; 831 PetscScalar *v; 832 833 PetscFunctionBegin; 834 if (!matout && M != N) { 835 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 836 } 837 ierr = MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);CHKERRQ(ierr); 838 839 m = a->A->m; n = a->A->n; v = Aloc->v; 840 ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr); 841 for (j=0; j<n; j++) { 842 for (i=0; i<m; i++) rwork[i] = rstart + i; 843 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 844 v += m; 845 } 846 ierr = PetscFree(rwork);CHKERRQ(ierr); 847 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 848 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 849 if (matout) { 850 *matout = B; 851 } else { 852 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 853 } 854 PetscFunctionReturn(0); 855 } 856 857 #include "petscblaslapack.h" 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatScale_MPIDense" 860 int MatScale_MPIDense(const PetscScalar *alpha,Mat inA) 861 { 862 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 863 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 864 int one = 1,nz; 865 866 PetscFunctionBegin; 867 nz = inA->m*inA->N; 868 BLscal_(&nz,(PetscScalar*)alpha,a->v,&one); 869 PetscLogFlops(nz); 870 PetscFunctionReturn(0); 871 } 872 873 static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 874 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 877 int MatSetUpPreallocation_MPIDense(Mat A) 878 { 879 int ierr; 880 881 PetscFunctionBegin; 882 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 886 /* -------------------------------------------------------------------*/ 887 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 888 MatGetRow_MPIDense, 889 MatRestoreRow_MPIDense, 890 MatMult_MPIDense, 891 /* 4*/ MatMultAdd_MPIDense, 892 MatMultTranspose_MPIDense, 893 MatMultTransposeAdd_MPIDense, 894 0, 895 0, 896 0, 897 /*10*/ 0, 898 0, 899 0, 900 0, 901 MatTranspose_MPIDense, 902 /*15*/ MatGetInfo_MPIDense, 903 0, 904 MatGetDiagonal_MPIDense, 905 MatDiagonalScale_MPIDense, 906 MatNorm_MPIDense, 907 /*20*/ MatAssemblyBegin_MPIDense, 908 MatAssemblyEnd_MPIDense, 909 0, 910 MatSetOption_MPIDense, 911 MatZeroEntries_MPIDense, 912 /*25*/ MatZeroRows_MPIDense, 913 0, 914 0, 915 0, 916 0, 917 /*30*/ MatSetUpPreallocation_MPIDense, 918 0, 919 0, 920 MatGetArray_MPIDense, 921 MatRestoreArray_MPIDense, 922 /*35*/ MatDuplicate_MPIDense, 923 0, 924 0, 925 0, 926 0, 927 /*40*/ 0, 928 MatGetSubMatrices_MPIDense, 929 0, 930 MatGetValues_MPIDense, 931 0, 932 /*45*/ 0, 933 MatScale_MPIDense, 934 0, 935 0, 936 0, 937 /*50*/ MatGetBlockSize_MPIDense, 938 0, 939 0, 940 0, 941 0, 942 /*55*/ 0, 943 0, 944 0, 945 0, 946 0, 947 /*60*/ MatGetSubMatrix_MPIDense, 948 MatDestroy_MPIDense, 949 MatView_MPIDense, 950 MatGetPetscMaps_Petsc, 951 0, 952 /*65*/ 0, 953 0, 954 0, 955 0, 956 0, 957 /*70*/ 0, 958 0, 959 0, 960 0, 961 0, 962 /*75*/ 0, 963 0, 964 0, 965 0, 966 0, 967 /*80*/ 0, 968 0, 969 0, 970 0, 971 /*85*/ MatLoad_MPIDense}; 972 973 EXTERN_C_BEGIN 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 976 int MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 977 { 978 Mat_MPIDense *a; 979 int ierr; 980 981 PetscFunctionBegin; 982 mat->preallocated = PETSC_TRUE; 983 /* Note: For now, when data is specified above, this assumes the user correctly 984 allocates the local dense storage space. We should add error checking. */ 985 986 a = (Mat_MPIDense*)mat->data; 987 ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr); 988 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 989 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 990 PetscLogObjectParent(mat,a->A); 991 PetscFunctionReturn(0); 992 } 993 EXTERN_C_END 994 995 /*MC 996 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 997 998 Options Database Keys: 999 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1000 1001 Level: beginner 1002 1003 .seealso: MatCreateMPIDense 1004 M*/ 1005 1006 EXTERN_C_BEGIN 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatCreate_MPIDense" 1009 int MatCreate_MPIDense(Mat mat) 1010 { 1011 Mat_MPIDense *a; 1012 int ierr,i; 1013 1014 PetscFunctionBegin; 1015 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1016 mat->data = (void*)a; 1017 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1018 mat->factor = 0; 1019 mat->mapping = 0; 1020 1021 a->factor = 0; 1022 mat->insertmode = NOT_SET_VALUES; 1023 ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr); 1024 ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr); 1025 1026 ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr); 1027 ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr); 1028 a->nvec = mat->n; 1029 1030 /* the information in the maps duplicates the information computed below, eventually 1031 we should remove the duplicate information that is not contained in the maps */ 1032 ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr); 1033 ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr); 1034 1035 /* build local table of row and column ownerships */ 1036 ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1037 a->cowners = a->rowners + a->size + 1; 1038 PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1039 ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1040 a->rowners[0] = 0; 1041 for (i=2; i<=a->size; i++) { 1042 a->rowners[i] += a->rowners[i-1]; 1043 } 1044 a->rstart = a->rowners[a->rank]; 1045 a->rend = a->rowners[a->rank+1]; 1046 ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr); 1047 a->cowners[0] = 0; 1048 for (i=2; i<=a->size; i++) { 1049 a->cowners[i] += a->cowners[i-1]; 1050 } 1051 1052 /* build cache for off array entries formed */ 1053 a->donotstash = PETSC_FALSE; 1054 ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr); 1055 1056 /* stuff used for matrix vector multiply */ 1057 a->lvec = 0; 1058 a->Mvctx = 0; 1059 a->roworiented = PETSC_TRUE; 1060 1061 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1062 "MatGetDiagonalBlock_MPIDense", 1063 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1064 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1065 "MatMPIDenseSetPreallocation_MPIDense", 1066 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } 1069 EXTERN_C_END 1070 1071 /*MC 1072 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1073 1074 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1075 and MATMPIDENSE otherwise. 1076 1077 Options Database Keys: 1078 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1079 1080 Level: beginner 1081 1082 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1083 M*/ 1084 1085 EXTERN_C_BEGIN 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatCreate_Dense" 1088 int MatCreate_Dense(Mat A) { 1089 int ierr,size; 1090 1091 PetscFunctionBegin; 1092 ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr); 1093 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1094 if (size == 1) { 1095 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1096 } else { 1097 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 EXTERN_C_END 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1105 /*@C 1106 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1107 1108 Not collective 1109 1110 Input Parameters: 1111 . A - the matrix 1112 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1113 to control all matrix memory allocation. 1114 1115 Notes: 1116 The dense format is fully compatible with standard Fortran 77 1117 storage by columns. 1118 1119 The data input variable is intended primarily for Fortran programmers 1120 who wish to allocate their own matrix memory space. Most users should 1121 set data=PETSC_NULL. 1122 1123 Level: intermediate 1124 1125 .keywords: matrix,dense, parallel 1126 1127 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1128 @*/ 1129 int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1130 { 1131 int ierr,(*f)(Mat,PetscScalar *); 1132 1133 PetscFunctionBegin; 1134 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1135 if (f) { 1136 ierr = (*f)(mat,data);CHKERRQ(ierr); 1137 } 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatCreateMPIDense" 1143 /*@C 1144 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1145 1146 Collective on MPI_Comm 1147 1148 Input Parameters: 1149 + comm - MPI communicator 1150 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1151 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1152 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1153 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1154 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1155 to control all matrix memory allocation. 1156 1157 Output Parameter: 1158 . A - the matrix 1159 1160 Notes: 1161 The dense format is fully compatible with standard Fortran 77 1162 storage by columns. 1163 1164 The data input variable is intended primarily for Fortran programmers 1165 who wish to allocate their own matrix memory space. Most users should 1166 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1167 1168 The user MUST specify either the local or global matrix dimensions 1169 (possibly both). 1170 1171 Level: intermediate 1172 1173 .keywords: matrix,dense, parallel 1174 1175 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1176 @*/ 1177 int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A) 1178 { 1179 int ierr,size; 1180 1181 PetscFunctionBegin; 1182 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 1183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1184 if (size > 1) { 1185 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1186 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1187 } else { 1188 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1189 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatDuplicate_MPIDense" 1196 static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1197 { 1198 Mat mat; 1199 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1200 int ierr; 1201 1202 PetscFunctionBegin; 1203 *newmat = 0; 1204 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr); 1205 ierr = MatSetType(mat,MATMPIDENSE);CHKERRQ(ierr); 1206 ierr = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr); 1207 mat->data = (void*)a; 1208 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1209 mat->factor = A->factor; 1210 mat->assembled = PETSC_TRUE; 1211 mat->preallocated = PETSC_TRUE; 1212 1213 a->rstart = oldmat->rstart; 1214 a->rend = oldmat->rend; 1215 a->size = oldmat->size; 1216 a->rank = oldmat->rank; 1217 mat->insertmode = NOT_SET_VALUES; 1218 a->nvec = oldmat->nvec; 1219 a->donotstash = oldmat->donotstash; 1220 ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr); 1221 PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense)); 1222 ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr); 1223 ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr); 1224 1225 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1226 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1227 PetscLogObjectParent(mat,a->A); 1228 *newmat = mat; 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #include "petscsys.h" 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1236 int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat) 1237 { 1238 int *rowners,i,size,rank,m,ierr,nz,j; 1239 PetscScalar *array,*vals,*vals_ptr; 1240 MPI_Status status; 1241 1242 PetscFunctionBegin; 1243 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1244 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1245 1246 /* determine ownership of all rows */ 1247 m = M/size + ((M % size) > rank); 1248 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1249 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1250 rowners[0] = 0; 1251 for (i=2; i<=size; i++) { 1252 rowners[i] += rowners[i-1]; 1253 } 1254 1255 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1256 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1257 1258 if (!rank) { 1259 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1260 1261 /* read in my part of the matrix numerical values */ 1262 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1263 1264 /* insert into matrix-by row (this is why cannot directly read into array */ 1265 vals_ptr = vals; 1266 for (i=0; i<m; i++) { 1267 for (j=0; j<N; j++) { 1268 array[i + j*m] = *vals_ptr++; 1269 } 1270 } 1271 1272 /* read in other processors and ship out */ 1273 for (i=1; i<size; i++) { 1274 nz = (rowners[i+1] - rowners[i])*N; 1275 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1276 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr); 1277 } 1278 } else { 1279 /* receive numeric values */ 1280 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1281 1282 /* receive message of values*/ 1283 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr); 1284 1285 /* insert into matrix-by row (this is why cannot directly read into array */ 1286 vals_ptr = vals; 1287 for (i=0; i<m; i++) { 1288 for (j=0; j<N; j++) { 1289 array[i + j*m] = *vals_ptr++; 1290 } 1291 } 1292 } 1293 ierr = PetscFree(rowners);CHKERRQ(ierr); 1294 ierr = PetscFree(vals);CHKERRQ(ierr); 1295 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1296 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatLoad_MPIDense" 1302 int MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1303 { 1304 Mat A; 1305 PetscScalar *vals,*svals; 1306 MPI_Comm comm = ((PetscObject)viewer)->comm; 1307 MPI_Status status; 1308 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1309 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1310 int tag = ((PetscObject)viewer)->tag; 1311 int i,nz,ierr,j,rstart,rend,fd; 1312 1313 PetscFunctionBegin; 1314 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1315 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1316 if (!rank) { 1317 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1318 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1319 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1320 } 1321 1322 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1323 M = header[1]; N = header[2]; nz = header[3]; 1324 1325 /* 1326 Handle case where matrix is stored on disk as a dense matrix 1327 */ 1328 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1329 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* determine ownership of all rows */ 1334 m = M/size + ((M % size) > rank); 1335 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1336 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1337 rowners[0] = 0; 1338 for (i=2; i<=size; i++) { 1339 rowners[i] += rowners[i-1]; 1340 } 1341 rstart = rowners[rank]; 1342 rend = rowners[rank+1]; 1343 1344 /* distribute row lengths to all processors */ 1345 ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);CHKERRQ(ierr); 1346 offlens = ourlens + (rend-rstart); 1347 if (!rank) { 1348 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1349 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1350 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1351 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1352 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1353 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1354 } else { 1355 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1356 } 1357 1358 if (!rank) { 1359 /* calculate the number of nonzeros on each processor */ 1360 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1361 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1362 for (i=0; i<size; i++) { 1363 for (j=rowners[i]; j< rowners[i+1]; j++) { 1364 procsnz[i] += rowlengths[j]; 1365 } 1366 } 1367 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1368 1369 /* determine max buffer needed and allocate it */ 1370 maxnz = 0; 1371 for (i=0; i<size; i++) { 1372 maxnz = PetscMax(maxnz,procsnz[i]); 1373 } 1374 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1375 1376 /* read in my part of the matrix column indices */ 1377 nz = procsnz[0]; 1378 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1379 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1380 1381 /* read in every one elses and ship off */ 1382 for (i=1; i<size; i++) { 1383 nz = procsnz[i]; 1384 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1385 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(cols);CHKERRQ(ierr); 1388 } else { 1389 /* determine buffer space needed for message */ 1390 nz = 0; 1391 for (i=0; i<m; i++) { 1392 nz += ourlens[i]; 1393 } 1394 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1395 1396 /* receive message of column indices*/ 1397 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1398 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1399 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1400 } 1401 1402 /* loop over local rows, determining number of off diagonal entries */ 1403 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1404 jj = 0; 1405 for (i=0; i<m; i++) { 1406 for (j=0; j<ourlens[i]; j++) { 1407 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1408 jj++; 1409 } 1410 } 1411 1412 /* create our matrix */ 1413 for (i=0; i<m; i++) { 1414 ourlens[i] -= offlens[i]; 1415 } 1416 ierr = MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);CHKERRQ(ierr); 1417 A = *newmat; 1418 for (i=0; i<m; i++) { 1419 ourlens[i] += offlens[i]; 1420 } 1421 1422 if (!rank) { 1423 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1424 1425 /* read in my part of the matrix numerical values */ 1426 nz = procsnz[0]; 1427 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1428 1429 /* insert into matrix */ 1430 jj = rstart; 1431 smycols = mycols; 1432 svals = vals; 1433 for (i=0; i<m; i++) { 1434 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1435 smycols += ourlens[i]; 1436 svals += ourlens[i]; 1437 jj++; 1438 } 1439 1440 /* read in other processors and ship out */ 1441 for (i=1; i<size; i++) { 1442 nz = procsnz[i]; 1443 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1444 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1445 } 1446 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1447 } else { 1448 /* receive numeric values */ 1449 ierr = PetscMalloc(nz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1450 1451 /* receive message of values*/ 1452 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1453 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1454 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1455 1456 /* insert into matrix */ 1457 jj = rstart; 1458 smycols = mycols; 1459 svals = vals; 1460 for (i=0; i<m; i++) { 1461 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1462 smycols += ourlens[i]; 1463 svals += ourlens[i]; 1464 jj++; 1465 } 1466 } 1467 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1468 ierr = PetscFree(vals);CHKERRQ(ierr); 1469 ierr = PetscFree(mycols);CHKERRQ(ierr); 1470 ierr = PetscFree(rowners);CHKERRQ(ierr); 1471 1472 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1473 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 1478 1479 1480 1481