1*c4762a1bSJed Brown 2*c4762a1bSJed Brown static char help[] = "Tests LU, Cholesky factorization and MatMatSolve() for a sequential dense matrix. \n\ 3*c4762a1bSJed Brown For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK. \n\ 4*c4762a1bSJed Brown For MATSEQDENSECUDA, it uses cusolverDn routines \n\n"; 5*c4762a1bSJed Brown 6*c4762a1bSJed Brown #include <petscmat.h> 7*c4762a1bSJed Brown 8*c4762a1bSJed Brown int main(int argc,char **argv) 9*c4762a1bSJed Brown { 10*c4762a1bSJed Brown Mat mat,F,RHS,SOLU; 11*c4762a1bSJed Brown MatInfo info; 12*c4762a1bSJed Brown PetscErrorCode ierr; 13*c4762a1bSJed Brown PetscInt n = 10,i,j,rstart,rend,nrhs=2; 14*c4762a1bSJed Brown PetscScalar value = 1.0; 15*c4762a1bSJed Brown Vec x,y,b,ytmp; 16*c4762a1bSJed Brown IS perm; 17*c4762a1bSJed Brown PetscReal norm,tol=PETSC_SMALL; 18*c4762a1bSJed Brown PetscMPIInt size; 19*c4762a1bSJed Brown PetscRandom rand; 20*c4762a1bSJed Brown char solver[64]; 21*c4762a1bSJed Brown PetscBool inplace,full = PETSC_FALSE, ldl = PETSC_TRUE; 22*c4762a1bSJed Brown 23*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr; 24*c4762a1bSJed Brown ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 25*c4762a1bSJed Brown if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); 26*c4762a1bSJed Brown ierr = PetscStrcpy(solver,"petsc");CHKERRQ(ierr); 27*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 28*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr); 29*c4762a1bSJed Brown ierr = PetscOptionsGetBool(NULL,NULL,"-ldl",&ldl,NULL);CHKERRQ(ierr); 30*c4762a1bSJed Brown ierr = PetscOptionsGetBool(NULL,NULL,"-full",&full,NULL);CHKERRQ(ierr); 31*c4762a1bSJed Brown ierr = PetscOptionsGetString(NULL,NULL,"-solver_type",solver,sizeof(solver),NULL);CHKERRQ(ierr); 32*c4762a1bSJed Brown 33*c4762a1bSJed Brown /* create multiple vectors RHS and SOLU */ 34*c4762a1bSJed Brown ierr = MatCreate(PETSC_COMM_WORLD,&RHS);CHKERRQ(ierr); 35*c4762a1bSJed Brown ierr = MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,n,nrhs);CHKERRQ(ierr); 36*c4762a1bSJed Brown ierr = MatSetType(RHS,MATDENSE);CHKERRQ(ierr); 37*c4762a1bSJed Brown ierr = MatSetOptionsPrefix(RHS,"rhs_");CHKERRQ(ierr); 38*c4762a1bSJed Brown ierr = MatSetFromOptions(RHS);CHKERRQ(ierr); 39*c4762a1bSJed Brown ierr = MatSeqDenseSetPreallocation(RHS,NULL);CHKERRQ(ierr); 40*c4762a1bSJed Brown 41*c4762a1bSJed Brown ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); 42*c4762a1bSJed Brown ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); 43*c4762a1bSJed Brown ierr = MatSetRandom(RHS,rand);CHKERRQ(ierr); 44*c4762a1bSJed Brown 45*c4762a1bSJed Brown ierr = MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU);CHKERRQ(ierr); 46*c4762a1bSJed Brown 47*c4762a1bSJed Brown /* create matrix */ 48*c4762a1bSJed Brown ierr = MatCreate(PETSC_COMM_WORLD,&mat);CHKERRQ(ierr); 49*c4762a1bSJed Brown ierr = MatSetSizes(mat,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr); 50*c4762a1bSJed Brown ierr = MatSetType(mat,MATDENSE);CHKERRQ(ierr); 51*c4762a1bSJed Brown ierr = MatSetFromOptions(mat);CHKERRQ(ierr); 52*c4762a1bSJed Brown ierr = MatSetUp(mat);CHKERRQ(ierr); 53*c4762a1bSJed Brown ierr = MatGetOwnershipRange(mat,&rstart,&rend);CHKERRQ(ierr); 54*c4762a1bSJed Brown if (!full) { 55*c4762a1bSJed Brown for (i=rstart; i<rend; i++) { 56*c4762a1bSJed Brown value = (PetscReal)i+1; 57*c4762a1bSJed Brown ierr = MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr); 58*c4762a1bSJed Brown } 59*c4762a1bSJed Brown ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 60*c4762a1bSJed Brown ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 61*c4762a1bSJed Brown } else { 62*c4762a1bSJed Brown Mat T; 63*c4762a1bSJed Brown 64*c4762a1bSJed Brown ierr = MatSetRandom(mat,rand);CHKERRQ(ierr); 65*c4762a1bSJed Brown ierr = MatMatTransposeMult(mat,mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr); 66*c4762a1bSJed Brown ierr = MatDestroy(&mat);CHKERRQ(ierr); 67*c4762a1bSJed Brown mat = T; 68*c4762a1bSJed Brown } 69*c4762a1bSJed Brown 70*c4762a1bSJed Brown /* create single vectors */ 71*c4762a1bSJed Brown ierr = MatCreateVecs(mat,&x,&b);CHKERRQ(ierr); 72*c4762a1bSJed Brown ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 73*c4762a1bSJed Brown ierr = VecDuplicate(y,&ytmp);CHKERRQ(ierr); 74*c4762a1bSJed Brown ierr = VecSet(x,value);CHKERRQ(ierr); 75*c4762a1bSJed Brown 76*c4762a1bSJed Brown /* Only SeqDense* support in-place factorizations and NULL permutations */ 77*c4762a1bSJed Brown ierr = PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQDENSE,&inplace);CHKERRQ(ierr); 78*c4762a1bSJed Brown ierr = MatGetLocalSize(mat,&i,NULL);CHKERRQ(ierr); 79*c4762a1bSJed Brown ierr = MatGetOwnershipRange(mat,&j,NULL);CHKERRQ(ierr); 80*c4762a1bSJed Brown ierr = ISCreateStride(PETSC_COMM_WORLD,i,j,1,&perm);CHKERRQ(ierr); 81*c4762a1bSJed Brown 82*c4762a1bSJed Brown ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 83*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"matrix nonzeros = %D, allocated nonzeros = %D\n", 84*c4762a1bSJed Brown (PetscInt)info.nz_used,(PetscInt)info.nz_allocated);CHKERRQ(ierr); 85*c4762a1bSJed Brown ierr = MatMult(mat,x,b);CHKERRQ(ierr); 86*c4762a1bSJed Brown 87*c4762a1bSJed Brown /* Cholesky factorization - perm and factinfo are ignored by LAPACK */ 88*c4762a1bSJed Brown /* in-place Cholesky */ 89*c4762a1bSJed Brown if (inplace) { 90*c4762a1bSJed Brown Mat RHS2; 91*c4762a1bSJed Brown 92*c4762a1bSJed Brown ierr = MatDuplicate(mat,MAT_COPY_VALUES,&F);CHKERRQ(ierr); 93*c4762a1bSJed Brown if (!ldl) { ierr = MatSetOption(F,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } 94*c4762a1bSJed Brown ierr = MatCholeskyFactor(F,perm,0);CHKERRQ(ierr); 95*c4762a1bSJed Brown ierr = MatSolve(F,b,y);CHKERRQ(ierr); 96*c4762a1bSJed Brown ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 97*c4762a1bSJed Brown ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 98*c4762a1bSJed Brown if (norm > tol) { 99*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place Cholesky %g\n",(double)norm);CHKERRQ(ierr); 100*c4762a1bSJed Brown } 101*c4762a1bSJed Brown 102*c4762a1bSJed Brown ierr = MatMatSolve(F,RHS,SOLU);CHKERRQ(ierr); 103*c4762a1bSJed Brown ierr = MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2);CHKERRQ(ierr); 104*c4762a1bSJed Brown ierr = MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 105*c4762a1bSJed Brown ierr = MatNorm(RHS,NORM_FROBENIUS,&norm);CHKERRQ(ierr); 106*c4762a1bSJed Brown if (norm > tol) { 107*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n",(double)norm);CHKERRQ(ierr); 108*c4762a1bSJed Brown } 109*c4762a1bSJed Brown ierr = MatDestroy(&F);CHKERRQ(ierr); 110*c4762a1bSJed Brown ierr = MatDestroy(&RHS2);CHKERRQ(ierr); 111*c4762a1bSJed Brown } 112*c4762a1bSJed Brown 113*c4762a1bSJed Brown /* out-of-place Cholesky */ 114*c4762a1bSJed Brown ierr = MatGetFactor(mat,solver,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); 115*c4762a1bSJed Brown if (!ldl) { ierr = MatSetOption(F,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } 116*c4762a1bSJed Brown ierr = MatCholeskyFactorSymbolic(F,mat,perm,0);CHKERRQ(ierr); 117*c4762a1bSJed Brown ierr = MatCholeskyFactorNumeric(F,mat,0);CHKERRQ(ierr); 118*c4762a1bSJed Brown ierr = MatSolve(F,b,y);CHKERRQ(ierr); 119*c4762a1bSJed Brown ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 120*c4762a1bSJed Brown ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 121*c4762a1bSJed Brown if (norm > tol) { 122*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place Cholesky %g\n",(double)norm);CHKERRQ(ierr); 123*c4762a1bSJed Brown } 124*c4762a1bSJed Brown ierr = MatDestroy(&F);CHKERRQ(ierr); 125*c4762a1bSJed Brown 126*c4762a1bSJed Brown /* LU factorization - perms and factinfo are ignored by LAPACK */ 127*c4762a1bSJed Brown i = n-1; 128*c4762a1bSJed Brown ierr = MatZeroRows(mat,1,&i,-1.0,NULL,NULL);CHKERRQ(ierr); 129*c4762a1bSJed Brown ierr = MatMult(mat,x,b);CHKERRQ(ierr); 130*c4762a1bSJed Brown 131*c4762a1bSJed Brown /* in-place LU */ 132*c4762a1bSJed Brown if (inplace) { 133*c4762a1bSJed Brown Mat RHS2; 134*c4762a1bSJed Brown 135*c4762a1bSJed Brown ierr = MatDuplicate(mat,MAT_COPY_VALUES,&F);CHKERRQ(ierr); 136*c4762a1bSJed Brown ierr = MatLUFactor(F,perm,perm,0);CHKERRQ(ierr); 137*c4762a1bSJed Brown ierr = MatSolve(F,b,y);CHKERRQ(ierr); 138*c4762a1bSJed Brown ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 139*c4762a1bSJed Brown ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 140*c4762a1bSJed Brown if (norm > tol) { 141*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place LU %g\n",(double)norm);CHKERRQ(ierr); 142*c4762a1bSJed Brown } 143*c4762a1bSJed Brown ierr = MatMatSolve(F,RHS,SOLU);CHKERRQ(ierr); 144*c4762a1bSJed Brown ierr = MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2);CHKERRQ(ierr); 145*c4762a1bSJed Brown ierr = MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 146*c4762a1bSJed Brown ierr = MatNorm(RHS,NORM_FROBENIUS,&norm);CHKERRQ(ierr); 147*c4762a1bSJed Brown if (norm > tol) { 148*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place LU (MatMatSolve) %g\n",(double)norm);CHKERRQ(ierr); 149*c4762a1bSJed Brown } 150*c4762a1bSJed Brown ierr = MatDestroy(&F);CHKERRQ(ierr); 151*c4762a1bSJed Brown ierr = MatDestroy(&RHS2);CHKERRQ(ierr); 152*c4762a1bSJed Brown } 153*c4762a1bSJed Brown 154*c4762a1bSJed Brown /* out-of-place LU */ 155*c4762a1bSJed Brown ierr = MatGetFactor(mat,solver,MAT_FACTOR_LU,&F);CHKERRQ(ierr); 156*c4762a1bSJed Brown ierr = MatLUFactorSymbolic(F,mat,perm,perm,0);CHKERRQ(ierr); 157*c4762a1bSJed Brown ierr = MatLUFactorNumeric(F,mat,0);CHKERRQ(ierr); 158*c4762a1bSJed Brown ierr = MatSolve(F,b,y);CHKERRQ(ierr); 159*c4762a1bSJed Brown ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 160*c4762a1bSJed Brown ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 161*c4762a1bSJed Brown if (norm > tol) { 162*c4762a1bSJed Brown ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place LU %g\n",(double)norm);CHKERRQ(ierr); 163*c4762a1bSJed Brown } 164*c4762a1bSJed Brown 165*c4762a1bSJed Brown /* free space */ 166*c4762a1bSJed Brown ierr = ISDestroy(&perm);CHKERRQ(ierr); 167*c4762a1bSJed Brown ierr = MatDestroy(&F);CHKERRQ(ierr); 168*c4762a1bSJed Brown ierr = MatDestroy(&mat);CHKERRQ(ierr); 169*c4762a1bSJed Brown ierr = MatDestroy(&RHS);CHKERRQ(ierr); 170*c4762a1bSJed Brown ierr = MatDestroy(&SOLU);CHKERRQ(ierr); 171*c4762a1bSJed Brown ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); 172*c4762a1bSJed Brown ierr = VecDestroy(&x);CHKERRQ(ierr); 173*c4762a1bSJed Brown ierr = VecDestroy(&b);CHKERRQ(ierr); 174*c4762a1bSJed Brown ierr = VecDestroy(&y);CHKERRQ(ierr); 175*c4762a1bSJed Brown ierr = VecDestroy(&ytmp);CHKERRQ(ierr); 176*c4762a1bSJed Brown ierr = PetscFinalize(); 177*c4762a1bSJed Brown return ierr; 178*c4762a1bSJed Brown } 179*c4762a1bSJed Brown 180*c4762a1bSJed Brown 181*c4762a1bSJed Brown 182*c4762a1bSJed Brown /*TEST 183*c4762a1bSJed Brown 184*c4762a1bSJed Brown test: 185*c4762a1bSJed Brown 186*c4762a1bSJed Brown test: 187*c4762a1bSJed Brown requires: cuda 188*c4762a1bSJed Brown suffix: seqdensecuda 189*c4762a1bSJed Brown args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}} 190*c4762a1bSJed Brown output_file: output/ex1_1.out 191*c4762a1bSJed Brown 192*c4762a1bSJed Brown test: 193*c4762a1bSJed Brown requires: cuda 194*c4762a1bSJed Brown suffix: seqdensecuda_seqaijcusparse 195*c4762a1bSJed Brown args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda 196*c4762a1bSJed Brown output_file: output/ex1_2.out 197*c4762a1bSJed Brown 198*c4762a1bSJed Brown test: 199*c4762a1bSJed Brown requires: cuda viennacl 200*c4762a1bSJed Brown suffix: seqdensecuda_seqaijviennacl 201*c4762a1bSJed Brown args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda 202*c4762a1bSJed Brown output_file: output/ex1_2.out 203*c4762a1bSJed Brown 204*c4762a1bSJed Brown TEST*/ 205