1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 198 { 199 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 200 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 201 PetscErrorCode ierr; 202 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 203 const PetscInt *irow,*icol; 204 PetscScalar *av,*bv,*v = lmat->v; 205 Mat newmat; 206 IS iscol_local; 207 MPI_Comm comm_is,comm_mat; 208 209 PetscFunctionBegin; 210 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 211 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 212 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 213 214 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 215 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 216 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 217 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 218 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 219 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 220 221 /* No parallel redistribution currently supported! Should really check each index set 222 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 223 original matrix! */ 224 225 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 226 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 227 228 /* Check submatrix call */ 229 if (scall == MAT_REUSE_MATRIX) { 230 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 231 /* Really need to test rows and column sizes! */ 232 newmat = *B; 233 } else { 234 /* Create and fill new matrix */ 235 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 236 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 237 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 238 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 239 } 240 241 /* Now extract the data pointers and do the copy, column at a time */ 242 newmatd = (Mat_MPIDense*)newmat->data; 243 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 244 245 for (i=0; i<Ncols; i++) { 246 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 247 for (j=0; j<nrows; j++) { 248 *bv++ = av[irow[j] - rstart]; 249 } 250 } 251 252 /* Assemble the matrices so that the correct flags are set */ 253 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 256 /* Free work space */ 257 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 259 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 260 *B = newmat; 261 PetscFunctionReturn(0); 262 } 263 264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 275 { 276 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 287 MPI_Comm comm; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 294 /* make sure all processors are either in INSERTMODE or ADDMODE */ 295 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 296 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 297 mat->insertmode = addv; /* in case this processor had no cache */ 298 299 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 300 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 301 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 306 { 307 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 308 PetscErrorCode ierr; 309 PetscInt i,*row,*col,flg,j,rstart,ncols; 310 PetscMPIInt n; 311 PetscScalar *val; 312 313 PetscFunctionBegin; 314 /* wait on receives */ 315 while (1) { 316 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 317 if (!flg) break; 318 319 for (i=0; i<n;) { 320 /* Now identify the consecutive vals belonging to the same row */ 321 for (j=i,rstart=row[j]; j<n; j++) { 322 if (row[j] != rstart) break; 323 } 324 if (j < n) ncols = j-i; 325 else ncols = n-i; 326 /* Now assemble all these values with a single function call */ 327 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 328 i = j; 329 } 330 } 331 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 332 333 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 334 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 335 336 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 337 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 343 { 344 PetscErrorCode ierr; 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 347 PetscFunctionBegin; 348 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 /* the code does not do the diagonal entries correctly unless the 353 matrix is square and the column and row owerships are identical. 354 This is a BUG. The only way to fix it seems to be to access 355 mdn->A and mdn->B directly and not through the MatZeroRows() 356 routine. 357 */ 358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 359 { 360 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 361 PetscErrorCode ierr; 362 PetscInt i,*owners = A->rmap->range; 363 PetscInt *sizes,j,idx,nsends; 364 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 365 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 366 PetscInt *lens,*lrows,*values; 367 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 368 MPI_Comm comm; 369 MPI_Request *send_waits,*recv_waits; 370 MPI_Status recv_status,*send_status; 371 PetscBool found; 372 const PetscScalar *xx; 373 PetscScalar *bb; 374 375 PetscFunctionBegin; 376 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 377 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 378 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 379 /* first count number of contributors to each processor */ 380 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 381 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 382 for (i=0; i<N; i++) { 383 idx = rows[i]; 384 found = PETSC_FALSE; 385 for (j=0; j<size; j++) { 386 if (idx >= owners[j] && idx < owners[j+1]) { 387 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 388 } 389 } 390 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 391 } 392 nsends = 0; 393 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 394 395 /* inform other processors of number of messages and max length*/ 396 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 397 398 /* post receives: */ 399 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 400 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 401 for (i=0; i<nrecvs; i++) { 402 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 403 } 404 405 /* do sends: 406 1) starts[i] gives the starting index in svalues for stuff going to 407 the ith processor 408 */ 409 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 410 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 411 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 412 413 starts[0] = 0; 414 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 415 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 416 417 starts[0] = 0; 418 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 419 count = 0; 420 for (i=0; i<size; i++) { 421 if (sizes[2*i+1]) { 422 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 423 } 424 } 425 ierr = PetscFree(starts);CHKERRQ(ierr); 426 427 base = owners[rank]; 428 429 /* wait on receives */ 430 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 431 count = nrecvs; 432 slen = 0; 433 while (count) { 434 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 435 /* unpack receives into our local space */ 436 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 437 438 source[imdex] = recv_status.MPI_SOURCE; 439 lens[imdex] = n; 440 slen += n; 441 count--; 442 } 443 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 444 445 /* move the data into the send scatter */ 446 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 447 count = 0; 448 for (i=0; i<nrecvs; i++) { 449 values = rvalues + i*nmax; 450 for (j=0; j<lens[i]; j++) { 451 lrows[count++] = values[j] - base; 452 } 453 } 454 ierr = PetscFree(rvalues);CHKERRQ(ierr); 455 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 456 ierr = PetscFree(owner);CHKERRQ(ierr); 457 ierr = PetscFree(sizes);CHKERRQ(ierr); 458 459 /* fix right hand side if needed */ 460 if (x && b) { 461 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 462 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 463 for (i=0; i<slen; i++) { 464 bb[lrows[i]] = diag*xx[lrows[i]]; 465 } 466 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 467 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 468 } 469 470 /* actually zap the local rows */ 471 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 472 if (diag != 0.0) { 473 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 474 PetscInt m = ll->lda, i; 475 476 for (i=0; i<slen; i++) { 477 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 478 } 479 } 480 ierr = PetscFree(lrows);CHKERRQ(ierr); 481 482 /* wait on sends */ 483 if (nsends) { 484 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 485 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 486 ierr = PetscFree(send_status);CHKERRQ(ierr); 487 } 488 ierr = PetscFree(send_waits);CHKERRQ(ierr); 489 ierr = PetscFree(svalues);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 497 498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 501 PetscErrorCode ierr; 502 503 PetscFunctionBegin; 504 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 517 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 523 { 524 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 525 PetscErrorCode ierr; 526 PetscScalar zero = 0.0; 527 528 PetscFunctionBegin; 529 ierr = VecSet(yy,zero);CHKERRQ(ierr); 530 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 531 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 537 { 538 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 543 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 544 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 545 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 552 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 553 PetscErrorCode ierr; 554 PetscInt len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 557 PetscFunctionBegin; 558 ierr = VecSet(v,zero);CHKERRQ(ierr); 559 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 560 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 561 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 563 radd = A->rmap->rstart*m; 564 for (i=0; i<len; i++) { 565 x[i] = aloc->v[radd + i*m + i]; 566 } 567 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 PetscErrorCode MatDestroy_MPIDense(Mat mat) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 579 #endif 580 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 581 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 582 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 583 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 584 585 ierr = PetscFree(mat->data);CHKERRQ(ierr); 586 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 587 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 595 #if defined(PETSC_HAVE_ELEMENTAL) 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 597 #endif 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 607 PetscFunctionReturn(0); 608 } 609 610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 611 { 612 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 613 PetscErrorCode ierr; 614 PetscViewerFormat format; 615 int fd; 616 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 617 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 618 PetscScalar *work,*v,*vv; 619 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 620 621 PetscFunctionBegin; 622 if (mdn->size == 1) { 623 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 624 } else { 625 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 626 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 628 629 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 630 if (format == PETSC_VIEWER_NATIVE) { 631 632 if (!rank) { 633 /* store the matrix as a dense matrix */ 634 header[0] = MAT_FILE_CLASSID; 635 header[1] = mat->rmap->N; 636 header[2] = N; 637 header[3] = MATRIX_BINARY_FORMAT_DENSE; 638 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 639 640 /* get largest work array needed for transposing array */ 641 mmax = mat->rmap->n; 642 for (i=1; i<size; i++) { 643 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 644 } 645 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 646 647 /* write out local array, by rows */ 648 m = mat->rmap->n; 649 v = a->v; 650 for (j=0; j<N; j++) { 651 for (i=0; i<m; i++) { 652 work[j + i*N] = *v++; 653 } 654 } 655 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 656 /* get largest work array to receive messages from other processes, excludes process zero */ 657 mmax = 0; 658 for (i=1; i<size; i++) { 659 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 660 } 661 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 662 for (k = 1; k < size; k++) { 663 v = vv; 664 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 665 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 666 667 for (j = 0; j < N; j++) { 668 for (i = 0; i < m; i++) { 669 work[j + i*N] = *v++; 670 } 671 } 672 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 673 } 674 ierr = PetscFree(work);CHKERRQ(ierr); 675 ierr = PetscFree(vv);CHKERRQ(ierr); 676 } else { 677 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 } 679 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 680 } 681 PetscFunctionReturn(0); 682 } 683 684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 685 #include <petscdraw.h> 686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 687 { 688 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 689 PetscErrorCode ierr; 690 PetscMPIInt rank = mdn->rank; 691 PetscViewerType vtype; 692 PetscBool iascii,isdraw; 693 PetscViewer sviewer; 694 PetscViewerFormat format; 695 696 PetscFunctionBegin; 697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 699 if (iascii) { 700 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 701 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 702 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 703 MatInfo info; 704 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 707 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 708 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 709 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } else if (format == PETSC_VIEWER_ASCII_INFO) { 712 PetscFunctionReturn(0); 713 } 714 } else if (isdraw) { 715 PetscDraw draw; 716 PetscBool isnull; 717 718 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 719 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 720 if (isnull) PetscFunctionReturn(0); 721 } 722 723 { 724 /* assemble the entire matrix onto first processor. */ 725 Mat A; 726 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 727 PetscInt *cols; 728 PetscScalar *vals; 729 730 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 733 } else { 734 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 735 } 736 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 737 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 738 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 739 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 740 741 /* Copy the matrix ... This isn't the most efficient means, 742 but it's quick for now */ 743 A->insertmode = INSERT_VALUES; 744 745 row = mat->rmap->rstart; 746 m = mdn->A->rmap->n; 747 for (i=0; i<m; i++) { 748 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 749 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 750 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 751 row++; 752 } 753 754 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 757 if (!rank) { 758 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 759 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 760 } 761 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 762 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 763 ierr = MatDestroy(&A);CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 769 { 770 PetscErrorCode ierr; 771 PetscBool iascii,isbinary,isdraw,issocket; 772 773 PetscFunctionBegin; 774 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 775 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 776 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 777 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 778 779 if (iascii || issocket || isdraw) { 780 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 781 } else if (isbinary) { 782 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 788 { 789 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 790 Mat mdn = mat->A; 791 PetscErrorCode ierr; 792 PetscReal isend[5],irecv[5]; 793 794 PetscFunctionBegin; 795 info->block_size = 1.0; 796 797 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 798 799 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 800 isend[3] = info->memory; isend[4] = info->mallocs; 801 if (flag == MAT_LOCAL) { 802 info->nz_used = isend[0]; 803 info->nz_allocated = isend[1]; 804 info->nz_unneeded = isend[2]; 805 info->memory = isend[3]; 806 info->mallocs = isend[4]; 807 } else if (flag == MAT_GLOBAL_MAX) { 808 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 809 810 info->nz_used = irecv[0]; 811 info->nz_allocated = irecv[1]; 812 info->nz_unneeded = irecv[2]; 813 info->memory = irecv[3]; 814 info->mallocs = irecv[4]; 815 } else if (flag == MAT_GLOBAL_SUM) { 816 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 817 818 info->nz_used = irecv[0]; 819 info->nz_allocated = irecv[1]; 820 info->nz_unneeded = irecv[2]; 821 info->memory = irecv[3]; 822 info->mallocs = irecv[4]; 823 } 824 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 825 info->fill_ratio_needed = 0; 826 info->factor_mallocs = 0; 827 PetscFunctionReturn(0); 828 } 829 830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 831 { 832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 switch (op) { 837 case MAT_NEW_NONZERO_LOCATIONS: 838 case MAT_NEW_NONZERO_LOCATION_ERR: 839 case MAT_NEW_NONZERO_ALLOCATION_ERR: 840 MatCheckPreallocated(A,1); 841 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 842 break; 843 case MAT_ROW_ORIENTED: 844 MatCheckPreallocated(A,1); 845 a->roworiented = flg; 846 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 847 break; 848 case MAT_NEW_DIAGONALS: 849 case MAT_KEEP_NONZERO_PATTERN: 850 case MAT_USE_HASH_TABLE: 851 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 852 break; 853 case MAT_IGNORE_OFF_PROC_ENTRIES: 854 a->donotstash = flg; 855 break; 856 case MAT_SYMMETRIC: 857 case MAT_STRUCTURALLY_SYMMETRIC: 858 case MAT_HERMITIAN: 859 case MAT_SYMMETRY_ETERNAL: 860 case MAT_IGNORE_LOWER_TRIANGULAR: 861 case MAT_IGNORE_ZERO_ENTRIES: 862 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 863 break; 864 default: 865 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 866 } 867 PetscFunctionReturn(0); 868 } 869 870 871 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 872 { 873 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 874 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 875 const PetscScalar *l,*r; 876 PetscScalar x,*v; 877 PetscErrorCode ierr; 878 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 879 880 PetscFunctionBegin; 881 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 882 if (ll) { 883 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 884 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 885 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 886 for (i=0; i<m; i++) { 887 x = l[i]; 888 v = mat->v + i; 889 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 890 } 891 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 892 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 893 } 894 if (rr) { 895 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 896 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 897 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 898 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 899 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 900 for (i=0; i<n; i++) { 901 x = r[i]; 902 v = mat->v + i*m; 903 for (j=0; j<m; j++) (*v++) *= x; 904 } 905 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 906 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 912 { 913 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 914 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 915 PetscErrorCode ierr; 916 PetscInt i,j; 917 PetscReal sum = 0.0; 918 PetscScalar *v = mat->v; 919 920 PetscFunctionBegin; 921 if (mdn->size == 1) { 922 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 923 } else { 924 if (type == NORM_FROBENIUS) { 925 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 926 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 927 } 928 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 *nrm = PetscSqrtReal(*nrm); 930 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 931 } else if (type == NORM_1) { 932 PetscReal *tmp,*tmp2; 933 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 934 *nrm = 0.0; 935 v = mat->v; 936 for (j=0; j<mdn->A->cmap->n; j++) { 937 for (i=0; i<mdn->A->rmap->n; i++) { 938 tmp[j] += PetscAbsScalar(*v); v++; 939 } 940 } 941 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 942 for (j=0; j<A->cmap->N; j++) { 943 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 944 } 945 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 946 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 947 } else if (type == NORM_INFINITY) { /* max row norm */ 948 PetscReal ntemp; 949 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 950 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 951 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 952 } 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 959 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 960 Mat B; 961 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 962 PetscErrorCode ierr; 963 PetscInt j,i; 964 PetscScalar *v; 965 966 PetscFunctionBegin; 967 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 968 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 969 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 970 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 971 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 972 } else { 973 B = *matout; 974 } 975 976 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 977 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 978 for (i=0; i<m; i++) rwork[i] = rstart + i; 979 for (j=0; j<n; j++) { 980 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 981 v += m; 982 } 983 ierr = PetscFree(rwork);CHKERRQ(ierr); 984 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 985 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 986 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 987 *matout = B; 988 } else { 989 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 996 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 997 998 PetscErrorCode MatSetUp_MPIDense(Mat A) 999 { 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1008 { 1009 PetscErrorCode ierr; 1010 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1011 1012 PetscFunctionBegin; 1013 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1014 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1019 { 1020 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 PetscErrorCode MatRealPart_MPIDense(Mat A) 1029 { 1030 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 1038 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1039 { 1040 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1041 PetscErrorCode ierr; 1042 1043 PetscFunctionBegin; 1044 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1049 { 1050 PetscErrorCode ierr; 1051 PetscScalar *x; 1052 const PetscScalar *a; 1053 PetscInt lda; 1054 1055 PetscFunctionBegin; 1056 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1057 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1058 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1059 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1060 ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1062 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1067 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1068 { 1069 PetscErrorCode ierr; 1070 PetscInt i,n; 1071 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1072 PetscReal *work; 1073 1074 PetscFunctionBegin; 1075 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1077 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1078 if (type == NORM_2) { 1079 for (i=0; i<n; i++) work[i] *= work[i]; 1080 } 1081 if (type == NORM_INFINITY) { 1082 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1083 } else { 1084 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1085 } 1086 ierr = PetscFree(work);CHKERRQ(ierr); 1087 if (type == NORM_2) { 1088 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1089 } 1090 PetscFunctionReturn(0); 1091 } 1092 1093 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1094 { 1095 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1096 PetscErrorCode ierr; 1097 PetscScalar *a; 1098 PetscInt m,n,i; 1099 1100 PetscFunctionBegin; 1101 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1102 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1103 for (i=0; i<m*n; i++) { 1104 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1105 } 1106 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1111 1112 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1113 { 1114 PetscFunctionBegin; 1115 *missing = PETSC_FALSE; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*); 1120 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*); 1121 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1122 1123 /* -------------------------------------------------------------------*/ 1124 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1125 MatGetRow_MPIDense, 1126 MatRestoreRow_MPIDense, 1127 MatMult_MPIDense, 1128 /* 4*/ MatMultAdd_MPIDense, 1129 MatMultTranspose_MPIDense, 1130 MatMultTransposeAdd_MPIDense, 1131 0, 1132 0, 1133 0, 1134 /* 10*/ 0, 1135 0, 1136 0, 1137 0, 1138 MatTranspose_MPIDense, 1139 /* 15*/ MatGetInfo_MPIDense, 1140 MatEqual_MPIDense, 1141 MatGetDiagonal_MPIDense, 1142 MatDiagonalScale_MPIDense, 1143 MatNorm_MPIDense, 1144 /* 20*/ MatAssemblyBegin_MPIDense, 1145 MatAssemblyEnd_MPIDense, 1146 MatSetOption_MPIDense, 1147 MatZeroEntries_MPIDense, 1148 /* 24*/ MatZeroRows_MPIDense, 1149 0, 1150 0, 1151 0, 1152 0, 1153 /* 29*/ MatSetUp_MPIDense, 1154 0, 1155 0, 1156 MatGetDiagonalBlock_MPIDense, 1157 0, 1158 /* 34*/ MatDuplicate_MPIDense, 1159 0, 1160 0, 1161 0, 1162 0, 1163 /* 39*/ MatAXPY_MPIDense, 1164 MatCreateSubMatrices_MPIDense, 1165 0, 1166 MatGetValues_MPIDense, 1167 0, 1168 /* 44*/ 0, 1169 MatScale_MPIDense, 1170 MatShift_Basic, 1171 0, 1172 0, 1173 /* 49*/ MatSetRandom_MPIDense, 1174 0, 1175 0, 1176 0, 1177 0, 1178 /* 54*/ 0, 1179 0, 1180 0, 1181 0, 1182 0, 1183 /* 59*/ MatCreateSubMatrix_MPIDense, 1184 MatDestroy_MPIDense, 1185 MatView_MPIDense, 1186 0, 1187 0, 1188 /* 64*/ 0, 1189 0, 1190 0, 1191 0, 1192 0, 1193 /* 69*/ 0, 1194 0, 1195 0, 1196 0, 1197 0, 1198 /* 74*/ 0, 1199 0, 1200 0, 1201 0, 1202 0, 1203 /* 79*/ 0, 1204 0, 1205 0, 1206 0, 1207 /* 83*/ MatLoad_MPIDense, 1208 0, 1209 0, 1210 0, 1211 0, 1212 0, 1213 #if defined(PETSC_HAVE_ELEMENTAL) 1214 /* 89*/ MatMatMult_MPIDense_MPIDense, 1215 MatMatMultSymbolic_MPIDense_MPIDense, 1216 #else 1217 /* 89*/ 0, 1218 0, 1219 #endif 1220 MatMatMultNumeric_MPIDense, 1221 0, 1222 0, 1223 /* 94*/ 0, 1224 MatMatTransposeMult_MPIDense_MPIDense, 1225 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1226 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1227 0, 1228 /* 99*/ 0, 1229 0, 1230 0, 1231 MatConjugate_MPIDense, 1232 0, 1233 /*104*/ 0, 1234 MatRealPart_MPIDense, 1235 MatImaginaryPart_MPIDense, 1236 0, 1237 0, 1238 /*109*/ 0, 1239 0, 1240 0, 1241 MatGetColumnVector_MPIDense, 1242 MatMissingDiagonal_MPIDense, 1243 /*114*/ 0, 1244 0, 1245 0, 1246 0, 1247 0, 1248 /*119*/ 0, 1249 0, 1250 0, 1251 0, 1252 0, 1253 /*124*/ 0, 1254 MatGetColumnNorms_MPIDense, 1255 0, 1256 0, 1257 0, 1258 /*129*/ 0, 1259 MatTransposeMatMult_MPIDense_MPIDense, 1260 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1261 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1262 0, 1263 /*134*/ 0, 1264 0, 1265 0, 1266 0, 1267 0, 1268 /*139*/ 0, 1269 0, 1270 0, 1271 0, 1272 0, 1273 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1274 }; 1275 1276 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1277 { 1278 Mat_MPIDense *a; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1283 mat->preallocated = PETSC_TRUE; 1284 /* Note: For now, when data is specified above, this assumes the user correctly 1285 allocates the local dense storage space. We should add error checking. */ 1286 1287 a = (Mat_MPIDense*)mat->data; 1288 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1289 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1290 a->nvec = mat->cmap->n; 1291 1292 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1293 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1294 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1295 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1296 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #if defined(PETSC_HAVE_ELEMENTAL) 1301 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1302 { 1303 Mat mat_elemental; 1304 PetscErrorCode ierr; 1305 PetscScalar *v; 1306 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1307 1308 PetscFunctionBegin; 1309 if (reuse == MAT_REUSE_MATRIX) { 1310 mat_elemental = *newmat; 1311 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1312 } else { 1313 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1314 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1315 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1316 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1317 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1318 } 1319 1320 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1321 for (i=0; i<N; i++) cols[i] = i; 1322 for (i=0; i<m; i++) rows[i] = rstart + i; 1323 1324 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1325 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1326 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1327 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1328 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1329 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1330 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1331 1332 if (reuse == MAT_INPLACE_MATRIX) { 1333 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1334 } else { 1335 *newmat = mat_elemental; 1336 } 1337 PetscFunctionReturn(0); 1338 } 1339 #endif 1340 1341 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1342 { 1343 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1352 { 1353 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1362 { 1363 PetscErrorCode ierr; 1364 Mat_MPIDense *mat; 1365 PetscInt m,nloc,N; 1366 1367 PetscFunctionBegin; 1368 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1369 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1370 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1371 PetscInt sum; 1372 1373 if (n == PETSC_DECIDE) { 1374 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1375 } 1376 /* Check sum(n) = N */ 1377 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1378 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1379 1380 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1381 } 1382 1383 /* numeric phase */ 1384 mat = (Mat_MPIDense*)(*outmat)->data; 1385 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1386 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1387 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1392 { 1393 Mat_MPIDense *a; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1398 mat->data = (void*)a; 1399 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1400 1401 mat->insertmode = NOT_SET_VALUES; 1402 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1403 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1404 1405 /* build cache for off array entries formed */ 1406 a->donotstash = PETSC_FALSE; 1407 1408 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1409 1410 /* stuff used for matrix vector multiply */ 1411 a->lvec = 0; 1412 a->Mvctx = 0; 1413 a->roworiented = PETSC_TRUE; 1414 1415 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1416 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1417 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1418 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1422 #if defined(PETSC_HAVE_ELEMENTAL) 1423 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1424 #endif 1425 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1426 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1427 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1428 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1429 1430 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1431 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1432 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1433 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1434 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1435 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 /*MC 1440 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1441 1442 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1443 and MATMPIDENSE otherwise. 1444 1445 Options Database Keys: 1446 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1447 1448 Level: beginner 1449 1450 1451 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1452 M*/ 1453 1454 /*@C 1455 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1456 1457 Not collective 1458 1459 Input Parameters: 1460 . B - the matrix 1461 - data - optional location of matrix data. Set data=NULL for PETSc 1462 to control all matrix memory allocation. 1463 1464 Notes: 1465 The dense format is fully compatible with standard Fortran 77 1466 storage by columns. 1467 1468 The data input variable is intended primarily for Fortran programmers 1469 who wish to allocate their own matrix memory space. Most users should 1470 set data=NULL. 1471 1472 Level: intermediate 1473 1474 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1475 @*/ 1476 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1477 { 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 /*@ 1486 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1487 array provided by the user. This is useful to avoid copying an array 1488 into a matrix 1489 1490 Not Collective 1491 1492 Input Parameters: 1493 + mat - the matrix 1494 - array - the array in column major order 1495 1496 Notes: 1497 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1498 freed when the matrix is destroyed. 1499 1500 Level: developer 1501 1502 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1503 1504 @*/ 1505 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1506 { 1507 PetscErrorCode ierr; 1508 PetscFunctionBegin; 1509 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1510 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 /*@ 1515 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1516 1517 Not Collective 1518 1519 Input Parameters: 1520 . mat - the matrix 1521 1522 Notes: 1523 You can only call this after a call to MatDensePlaceArray() 1524 1525 Level: developer 1526 1527 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1528 1529 @*/ 1530 PetscErrorCode MatDenseResetArray(Mat mat) 1531 { 1532 PetscErrorCode ierr; 1533 PetscFunctionBegin; 1534 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1535 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 /*@C 1540 MatCreateDense - Creates a parallel matrix in dense format. 1541 1542 Collective 1543 1544 Input Parameters: 1545 + comm - MPI communicator 1546 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1547 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1548 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1549 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1550 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1551 to control all matrix memory allocation. 1552 1553 Output Parameter: 1554 . A - the matrix 1555 1556 Notes: 1557 The dense format is fully compatible with standard Fortran 77 1558 storage by columns. 1559 1560 The data input variable is intended primarily for Fortran programmers 1561 who wish to allocate their own matrix memory space. Most users should 1562 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1563 1564 The user MUST specify either the local or global matrix dimensions 1565 (possibly both). 1566 1567 Level: intermediate 1568 1569 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1570 @*/ 1571 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1572 { 1573 PetscErrorCode ierr; 1574 PetscMPIInt size; 1575 1576 PetscFunctionBegin; 1577 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1578 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1579 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1580 if (size > 1) { 1581 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1582 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1583 if (data) { /* user provided data array, so no need to assemble */ 1584 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1585 (*A)->assembled = PETSC_TRUE; 1586 } 1587 } else { 1588 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1589 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1590 } 1591 PetscFunctionReturn(0); 1592 } 1593 1594 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1595 { 1596 Mat mat; 1597 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 *newmat = 0; 1602 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1603 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1604 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1605 a = (Mat_MPIDense*)mat->data; 1606 1607 mat->factortype = A->factortype; 1608 mat->assembled = PETSC_TRUE; 1609 mat->preallocated = PETSC_TRUE; 1610 1611 a->size = oldmat->size; 1612 a->rank = oldmat->rank; 1613 mat->insertmode = NOT_SET_VALUES; 1614 a->nvec = oldmat->nvec; 1615 a->donotstash = oldmat->donotstash; 1616 1617 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1618 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1619 1620 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1621 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1622 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1623 1624 *newmat = mat; 1625 PetscFunctionReturn(0); 1626 } 1627 1628 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1629 { 1630 PetscErrorCode ierr; 1631 PetscMPIInt rank,size; 1632 const PetscInt *rowners; 1633 PetscInt i,m,n,nz,j,mMax; 1634 PetscScalar *array,*vals,*vals_ptr; 1635 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1636 1637 PetscFunctionBegin; 1638 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1639 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1640 1641 /* determine ownership of rows and columns */ 1642 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1643 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1644 1645 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1646 if (!a->A) { 1647 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1648 } 1649 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1650 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1651 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1652 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1653 if (!rank) { 1654 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1655 1656 /* read in my part of the matrix numerical values */ 1657 ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1658 1659 /* insert into matrix-by row (this is why cannot directly read into array */ 1660 vals_ptr = vals; 1661 for (i=0; i<m; i++) { 1662 for (j=0; j<N; j++) { 1663 array[i + j*m] = *vals_ptr++; 1664 } 1665 } 1666 1667 /* read in other processors and ship out */ 1668 for (i=1; i<size; i++) { 1669 nz = (rowners[i+1] - rowners[i])*N; 1670 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1671 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1672 } 1673 } else { 1674 /* receive numeric values */ 1675 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1676 1677 /* receive message of values*/ 1678 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1679 1680 /* insert into matrix-by row (this is why cannot directly read into array */ 1681 vals_ptr = vals; 1682 for (i=0; i<m; i++) { 1683 for (j=0; j<N; j++) { 1684 array[i + j*m] = *vals_ptr++; 1685 } 1686 } 1687 } 1688 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1689 ierr = PetscFree(vals);CHKERRQ(ierr); 1690 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1696 { 1697 Mat_MPIDense *a; 1698 PetscScalar *vals,*svals; 1699 MPI_Comm comm; 1700 MPI_Status status; 1701 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1702 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1703 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1704 PetscInt i,nz,j,rstart,rend; 1705 int fd; 1706 PetscBool isbinary; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1711 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 1712 1713 /* force binary viewer to load .info file if it has not yet done so */ 1714 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1715 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1716 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1717 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1718 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1719 if (!rank) { 1720 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 1721 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1722 } 1723 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1724 M = header[1]; N = header[2]; nz = header[3]; 1725 1726 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1727 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1728 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1729 1730 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1731 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1732 1733 /* 1734 Handle case where matrix is stored on disk as a dense matrix 1735 */ 1736 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1737 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 /* determine ownership of all rows */ 1742 if (newmat->rmap->n < 0) { 1743 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1744 } else { 1745 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1746 } 1747 if (newmat->cmap->n < 0) { 1748 n = PETSC_DECIDE; 1749 } else { 1750 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1751 } 1752 1753 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1754 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1755 rowners[0] = 0; 1756 for (i=2; i<=size; i++) { 1757 rowners[i] += rowners[i-1]; 1758 } 1759 rstart = rowners[rank]; 1760 rend = rowners[rank+1]; 1761 1762 /* distribute row lengths to all processors */ 1763 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1764 if (!rank) { 1765 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1766 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 1767 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1768 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1769 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1770 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1771 } else { 1772 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1773 } 1774 1775 if (!rank) { 1776 /* calculate the number of nonzeros on each processor */ 1777 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 1778 for (i=0; i<size; i++) { 1779 for (j=rowners[i]; j< rowners[i+1]; j++) { 1780 procsnz[i] += rowlengths[j]; 1781 } 1782 } 1783 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1784 1785 /* determine max buffer needed and allocate it */ 1786 maxnz = 0; 1787 for (i=0; i<size; i++) { 1788 maxnz = PetscMax(maxnz,procsnz[i]); 1789 } 1790 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1791 1792 /* read in my part of the matrix column indices */ 1793 nz = procsnz[0]; 1794 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1795 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1796 1797 /* read in every one elses and ship off */ 1798 for (i=1; i<size; i++) { 1799 nz = procsnz[i]; 1800 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1801 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1802 } 1803 ierr = PetscFree(cols);CHKERRQ(ierr); 1804 } else { 1805 /* determine buffer space needed for message */ 1806 nz = 0; 1807 for (i=0; i<m; i++) { 1808 nz += ourlens[i]; 1809 } 1810 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1811 1812 /* receive message of column indices*/ 1813 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1814 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1815 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1816 } 1817 1818 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1819 a = (Mat_MPIDense*)newmat->data; 1820 if (!a->A) { 1821 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1822 } 1823 1824 if (!rank) { 1825 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1826 1827 /* read in my part of the matrix numerical values */ 1828 nz = procsnz[0]; 1829 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1830 1831 /* insert into matrix */ 1832 jj = rstart; 1833 smycols = mycols; 1834 svals = vals; 1835 for (i=0; i<m; i++) { 1836 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1837 smycols += ourlens[i]; 1838 svals += ourlens[i]; 1839 jj++; 1840 } 1841 1842 /* read in other processors and ship out */ 1843 for (i=1; i<size; i++) { 1844 nz = procsnz[i]; 1845 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1846 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1847 } 1848 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1849 } else { 1850 /* receive numeric values */ 1851 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1852 1853 /* receive message of values*/ 1854 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1855 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1856 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1857 1858 /* insert into matrix */ 1859 jj = rstart; 1860 smycols = mycols; 1861 svals = vals; 1862 for (i=0; i<m; i++) { 1863 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1864 smycols += ourlens[i]; 1865 svals += ourlens[i]; 1866 jj++; 1867 } 1868 } 1869 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1870 ierr = PetscFree(vals);CHKERRQ(ierr); 1871 ierr = PetscFree(mycols);CHKERRQ(ierr); 1872 ierr = PetscFree(rowners);CHKERRQ(ierr); 1873 1874 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1875 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1880 { 1881 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1882 Mat a,b; 1883 PetscBool flg; 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 a = matA->A; 1888 b = matB->A; 1889 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1890 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1895 { 1896 PetscErrorCode ierr; 1897 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1898 Mat_TransMatMultDense *atb = a->atbdense; 1899 1900 PetscFunctionBegin; 1901 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1902 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1903 ierr = PetscFree(atb);CHKERRQ(ierr); 1904 PetscFunctionReturn(0); 1905 } 1906 1907 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1908 { 1909 PetscErrorCode ierr; 1910 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1911 Mat_MatTransMultDense *abt = a->abtdense; 1912 1913 PetscFunctionBegin; 1914 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1915 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1916 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1917 ierr = PetscFree(abt);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1922 { 1923 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1924 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1925 Mat_TransMatMultDense *atb = c->atbdense; 1926 PetscErrorCode ierr; 1927 MPI_Comm comm; 1928 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1929 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1930 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1931 PetscScalar _DOne=1.0,_DZero=0.0; 1932 PetscBLASInt am,an,bn,aN; 1933 const PetscInt *ranges; 1934 1935 PetscFunctionBegin; 1936 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1937 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1938 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1939 1940 /* compute atbarray = aseq^T * bseq */ 1941 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1942 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1943 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1944 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1945 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1946 1947 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1948 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1949 1950 /* arrange atbarray into sendbuf */ 1951 k = 0; 1952 for (proc=0; proc<size; proc++) { 1953 for (j=0; j<cN; j++) { 1954 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1955 } 1956 } 1957 /* sum all atbarray to local values of C */ 1958 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1959 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1960 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1965 { 1966 PetscErrorCode ierr; 1967 Mat Cdense; 1968 MPI_Comm comm; 1969 PetscMPIInt size; 1970 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1971 Mat_MPIDense *c; 1972 Mat_TransMatMultDense *atb; 1973 1974 PetscFunctionBegin; 1975 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1976 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1977 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1978 } 1979 1980 /* create matrix product Cdense */ 1981 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1982 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1983 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1984 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1985 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1986 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 *C = Cdense; 1988 1989 /* create data structure for reuse Cdense */ 1990 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1991 ierr = PetscNew(&atb);CHKERRQ(ierr); 1992 cM = Cdense->rmap->N; 1993 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1994 1995 c = (Mat_MPIDense*)Cdense->data; 1996 c->atbdense = atb; 1997 atb->destroy = Cdense->ops->destroy; 1998 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2003 { 2004 PetscErrorCode ierr; 2005 2006 PetscFunctionBegin; 2007 if (scall == MAT_INITIAL_MATRIX) { 2008 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2009 } 2010 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2011 PetscFunctionReturn(0); 2012 } 2013 2014 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C) 2015 { 2016 PetscErrorCode ierr; 2017 Mat Cdense; 2018 MPI_Comm comm; 2019 PetscMPIInt i, size; 2020 PetscInt maxRows, bufsiz; 2021 Mat_MPIDense *c; 2022 PetscMPIInt tag; 2023 const char *algTypes[2] = {"allgatherv","cyclic"}; 2024 PetscInt alg, nalg = 2; 2025 Mat_MatTransMultDense *abt; 2026 2027 PetscFunctionBegin; 2028 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2029 alg = 0; /* default is allgatherv */ 2030 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2031 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr); 2032 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2033 if (A->cmap->N != B->cmap->N) { 2034 SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N); 2035 } 2036 2037 /* create matrix product Cdense */ 2038 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2039 ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2040 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2041 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2042 ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr); 2043 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2044 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2045 *C = Cdense; 2046 2047 /* create data structure for reuse Cdense */ 2048 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2049 ierr = PetscNew(&abt);CHKERRQ(ierr); 2050 abt->tag = tag; 2051 abt->alg = alg; 2052 switch (alg) { 2053 case 1: 2054 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2055 bufsiz = A->cmap->N * maxRows; 2056 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2057 break; 2058 default: 2059 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2060 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2061 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2062 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2063 break; 2064 } 2065 2066 c = (Mat_MPIDense*)Cdense->data; 2067 c->abtdense = abt; 2068 abt->destroy = Cdense->ops->destroy; 2069 Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2070 PetscFunctionReturn(0); 2071 } 2072 2073 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2074 { 2075 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2076 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2077 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2078 Mat_MatTransMultDense *abt = c->abtdense; 2079 PetscErrorCode ierr; 2080 MPI_Comm comm; 2081 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2082 PetscScalar *sendbuf, *recvbuf=0, *carray; 2083 PetscInt i,cK=A->cmap->N,k,j,bn; 2084 PetscScalar _DOne=1.0,_DZero=0.0; 2085 PetscBLASInt cm, cn, ck; 2086 MPI_Request reqs[2]; 2087 const PetscInt *ranges; 2088 2089 PetscFunctionBegin; 2090 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2091 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2092 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2093 2094 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2095 bn = B->rmap->n; 2096 if (bseq->lda == bn) { 2097 sendbuf = bseq->v; 2098 } else { 2099 sendbuf = abt->buf[0]; 2100 for (k = 0, i = 0; i < cK; i++) { 2101 for (j = 0; j < bn; j++, k++) { 2102 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2103 } 2104 } 2105 } 2106 if (size > 1) { 2107 sendto = (rank + size - 1) % size; 2108 recvfrom = (rank + size + 1) % size; 2109 } else { 2110 sendto = recvfrom = 0; 2111 } 2112 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2113 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2114 recvisfrom = rank; 2115 for (i = 0; i < size; i++) { 2116 /* we have finished receiving in sending, bufs can be read/modified */ 2117 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2118 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2119 2120 if (nextrecvisfrom != rank) { 2121 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2122 sendsiz = cK * bn; 2123 recvsiz = cK * nextbn; 2124 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2125 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2126 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2127 } 2128 2129 /* local aseq * sendbuf^T */ 2130 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2131 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 2132 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 2133 2134 if (nextrecvisfrom != rank) { 2135 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2136 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2137 } 2138 bn = nextbn; 2139 recvisfrom = nextrecvisfrom; 2140 sendbuf = recvbuf; 2141 } 2142 PetscFunctionReturn(0); 2143 } 2144 2145 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2146 { 2147 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2148 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2149 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2150 Mat_MatTransMultDense *abt = c->abtdense; 2151 PetscErrorCode ierr; 2152 MPI_Comm comm; 2153 PetscMPIInt rank,size; 2154 PetscScalar *sendbuf, *recvbuf; 2155 PetscInt i,cK=A->cmap->N,k,j,bn; 2156 PetscScalar _DOne=1.0,_DZero=0.0; 2157 PetscBLASInt cm, cn, ck; 2158 2159 PetscFunctionBegin; 2160 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2161 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2162 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2163 2164 /* copy transpose of B into buf[0] */ 2165 bn = B->rmap->n; 2166 sendbuf = abt->buf[0]; 2167 recvbuf = abt->buf[1]; 2168 for (k = 0, j = 0; j < bn; j++) { 2169 for (i = 0; i < cK; i++, k++) { 2170 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2171 } 2172 } 2173 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2174 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2175 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2176 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2177 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 2178 PetscFunctionReturn(0); 2179 } 2180 2181 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2182 { 2183 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2184 Mat_MatTransMultDense *abt = c->abtdense; 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 switch (abt->alg) { 2189 case 1: 2190 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2191 break; 2192 default: 2193 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2194 break; 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C) 2200 { 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 if (scall == MAT_INITIAL_MATRIX) { 2205 ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2206 } 2207 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2212 { 2213 PetscErrorCode ierr; 2214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2215 Mat_MatMultDense *ab = a->abdense; 2216 2217 PetscFunctionBegin; 2218 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2219 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2220 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2221 2222 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2223 ierr = PetscFree(ab);CHKERRQ(ierr); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #if defined(PETSC_HAVE_ELEMENTAL) 2228 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2229 { 2230 PetscErrorCode ierr; 2231 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2232 Mat_MatMultDense *ab=c->abdense; 2233 2234 PetscFunctionBegin; 2235 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2236 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2237 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2238 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2239 PetscFunctionReturn(0); 2240 } 2241 2242 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 2243 { 2244 PetscErrorCode ierr; 2245 Mat Ae,Be,Ce; 2246 Mat_MPIDense *c; 2247 Mat_MatMultDense *ab; 2248 2249 PetscFunctionBegin; 2250 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2251 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2252 } 2253 2254 /* convert A and B to Elemental matrices Ae and Be */ 2255 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2256 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2257 2258 /* Ce = Ae*Be */ 2259 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2260 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2261 2262 /* convert Ce to C */ 2263 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2264 2265 /* create data structure for reuse Cdense */ 2266 ierr = PetscNew(&ab);CHKERRQ(ierr); 2267 c = (Mat_MPIDense*)(*C)->data; 2268 c->abdense = ab; 2269 2270 ab->Ae = Ae; 2271 ab->Be = Be; 2272 ab->Ce = Ce; 2273 ab->destroy = (*C)->ops->destroy; 2274 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2275 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2276 PetscFunctionReturn(0); 2277 } 2278 2279 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2280 { 2281 PetscErrorCode ierr; 2282 2283 PetscFunctionBegin; 2284 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 2285 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2286 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2287 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2288 } else { 2289 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2290 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2291 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2292 } 2293 PetscFunctionReturn(0); 2294 } 2295 #endif 2296 2297