1*c4762a1bSJed Brown 2*c4762a1bSJed Brown /* 3*c4762a1bSJed Brown Include "petscsnes.h" so that we can use SNES solvers. Note that this 4*c4762a1bSJed Brown file automatically includes: 5*c4762a1bSJed Brown petscsys.h - base PETSc routines petscvec.h - vectors 6*c4762a1bSJed Brown petscmat.h - matrices 7*c4762a1bSJed Brown petscis.h - index sets petscksp.h - Krylov subspace methods 8*c4762a1bSJed Brown petscviewer.h - viewers petscpc.h - preconditioners 9*c4762a1bSJed Brown petscksp.h - linear solvers 10*c4762a1bSJed Brown */ 11*c4762a1bSJed Brown #include <petscsnes.h> 12*c4762a1bSJed Brown #include <petscao.h> 13*c4762a1bSJed Brown 14*c4762a1bSJed Brown static char help[] = "An Unstructured Grid Example.\n\ 15*c4762a1bSJed Brown This example demonstrates how to solve a nonlinear system in parallel\n\ 16*c4762a1bSJed Brown with SNES for an unstructured mesh. The mesh and partitioning information\n\ 17*c4762a1bSJed Brown is read in an application defined ordering,which is later transformed\n\ 18*c4762a1bSJed Brown into another convenient ordering (called the local ordering). The local\n\ 19*c4762a1bSJed Brown ordering, apart from being efficient on cpu cycles and memory, allows\n\ 20*c4762a1bSJed Brown the use of the SPMD model of parallel programming. After partitioning\n\ 21*c4762a1bSJed Brown is done, scatters are created between local (sequential)and global\n\ 22*c4762a1bSJed Brown (distributed) vectors. Finally, we set up the nonlinear solver context\n\ 23*c4762a1bSJed Brown in the usual way as a structured grid (see\n\ 24*c4762a1bSJed Brown petsc/src/snes/tutorials/ex5.c).\n\ 25*c4762a1bSJed Brown This example also illustrates the use of parallel matrix coloring.\n\ 26*c4762a1bSJed Brown The command line options include:\n\ 27*c4762a1bSJed Brown -vert <Nv>, where Nv is the global number of nodes\n\ 28*c4762a1bSJed Brown -elem <Ne>, where Ne is the global number of elements\n\ 29*c4762a1bSJed Brown -nl_par <lambda>, where lambda is the multiplier for the non linear term (u*u) term\n\ 30*c4762a1bSJed Brown -lin_par <alpha>, where alpha is the multiplier for the linear term (u)\n\ 31*c4762a1bSJed Brown -fd_jacobian_coloring -mat_coloring_type lf\n"; 32*c4762a1bSJed Brown 33*c4762a1bSJed Brown /*T 34*c4762a1bSJed Brown Concepts: SNES^unstructured grid 35*c4762a1bSJed Brown Concepts: AO^application to PETSc ordering or vice versa; 36*c4762a1bSJed Brown Concepts: VecScatter^using vector scatter operations; 37*c4762a1bSJed Brown Processors: n 38*c4762a1bSJed Brown T*/ 39*c4762a1bSJed Brown 40*c4762a1bSJed Brown 41*c4762a1bSJed Brown 42*c4762a1bSJed Brown /* ------------------------------------------------------------------------ 43*c4762a1bSJed Brown 44*c4762a1bSJed Brown PDE Solved : L(u) + lambda*u*u + alpha*u = 0 where L(u) is the Laplacian. 45*c4762a1bSJed Brown 46*c4762a1bSJed Brown The Laplacian is approximated in the following way: each edge is given a weight 47*c4762a1bSJed Brown of one meaning that the diagonal term will have the weight equal to the degree 48*c4762a1bSJed Brown of a node. The off diagonal terms will get a weight of -1. 49*c4762a1bSJed Brown 50*c4762a1bSJed Brown -----------------------------------------------------------------------*/ 51*c4762a1bSJed Brown 52*c4762a1bSJed Brown 53*c4762a1bSJed Brown #define MAX_ELEM 500 /* Maximum number of elements */ 54*c4762a1bSJed Brown #define MAX_VERT 100 /* Maximum number of vertices */ 55*c4762a1bSJed Brown #define MAX_VERT_ELEM 3 /* Vertices per element */ 56*c4762a1bSJed Brown 57*c4762a1bSJed Brown /* 58*c4762a1bSJed Brown Application-defined context for problem specific data 59*c4762a1bSJed Brown */ 60*c4762a1bSJed Brown typedef struct { 61*c4762a1bSJed Brown PetscInt Nvglobal,Nvlocal; /* global and local number of vertices */ 62*c4762a1bSJed Brown PetscInt Neglobal,Nelocal; /* global and local number of vertices */ 63*c4762a1bSJed Brown PetscInt AdjM[MAX_VERT][50]; /* adjacency list of a vertex */ 64*c4762a1bSJed Brown PetscInt itot[MAX_VERT]; /* total number of neighbors for a vertex */ 65*c4762a1bSJed Brown PetscInt icv[MAX_ELEM][MAX_VERT_ELEM]; /* vertices belonging to an element */ 66*c4762a1bSJed Brown PetscInt v2p[MAX_VERT]; /* processor number for a vertex */ 67*c4762a1bSJed Brown PetscInt *locInd,*gloInd; /* local and global orderings for a node */ 68*c4762a1bSJed Brown Vec localX,localF; /* local solution (u) and f(u) vectors */ 69*c4762a1bSJed Brown PetscReal non_lin_param; /* nonlinear parameter for the PDE */ 70*c4762a1bSJed Brown PetscReal lin_param; /* linear parameter for the PDE */ 71*c4762a1bSJed Brown VecScatter scatter; /* scatter context for the local and 72*c4762a1bSJed Brown distributed vectors */ 73*c4762a1bSJed Brown } AppCtx; 74*c4762a1bSJed Brown 75*c4762a1bSJed Brown /* 76*c4762a1bSJed Brown User-defined routines 77*c4762a1bSJed Brown */ 78*c4762a1bSJed Brown PetscErrorCode FormJacobian(SNES,Vec,Mat,Mat,void*); 79*c4762a1bSJed Brown PetscErrorCode FormFunction(SNES,Vec,Vec,void*); 80*c4762a1bSJed Brown PetscErrorCode FormInitialGuess(AppCtx*,Vec); 81*c4762a1bSJed Brown 82*c4762a1bSJed Brown int main(int argc,char **argv) 83*c4762a1bSJed Brown { 84*c4762a1bSJed Brown SNES snes; /* SNES context */ 85*c4762a1bSJed Brown SNESType type = SNESNEWTONLS; /* default nonlinear solution method */ 86*c4762a1bSJed Brown Vec x,r; /* solution, residual vectors */ 87*c4762a1bSJed Brown Mat Jac; /* Jacobian matrix */ 88*c4762a1bSJed Brown AppCtx user; /* user-defined application context */ 89*c4762a1bSJed Brown AO ao; /* Application Ordering object */ 90*c4762a1bSJed Brown IS isglobal,islocal; /* global and local index sets */ 91*c4762a1bSJed Brown PetscMPIInt rank,size; /* rank of a process, number of processors */ 92*c4762a1bSJed Brown PetscInt rstart; /* starting index of PETSc ordering for a processor */ 93*c4762a1bSJed Brown PetscInt nfails; /* number of unsuccessful Newton steps */ 94*c4762a1bSJed Brown PetscInt bs = 1; /* block size for multicomponent systems */ 95*c4762a1bSJed Brown PetscInt nvertices; /* number of local plus ghost nodes of a processor */ 96*c4762a1bSJed Brown PetscInt *pordering; /* PETSc ordering */ 97*c4762a1bSJed Brown PetscInt *vertices; /* list of all vertices (incl. ghost ones) on a processor */ 98*c4762a1bSJed Brown PetscInt *verticesmask; 99*c4762a1bSJed Brown PetscInt *tmp; 100*c4762a1bSJed Brown PetscInt i,j,jstart,inode,nb,nbrs,Nvneighborstotal = 0; 101*c4762a1bSJed Brown PetscErrorCode ierr; 102*c4762a1bSJed Brown PetscInt its,N; 103*c4762a1bSJed Brown PetscScalar *xx; 104*c4762a1bSJed Brown char str[256],form[256],part_name[256]; 105*c4762a1bSJed Brown FILE *fptr,*fptr1; 106*c4762a1bSJed Brown ISLocalToGlobalMapping isl2g; 107*c4762a1bSJed Brown int dtmp; 108*c4762a1bSJed Brown #if defined(UNUSED_VARIABLES) 109*c4762a1bSJed Brown PetscDraw draw; /* drawing context */ 110*c4762a1bSJed Brown PetscScalar *ff,*gg; 111*c4762a1bSJed Brown PetscReal tiny = 1.0e-10,zero = 0.0,one = 1.0,big = 1.0e+10; 112*c4762a1bSJed Brown PetscInt *tmp1,*tmp2; 113*c4762a1bSJed Brown #endif 114*c4762a1bSJed Brown MatFDColoring matfdcoloring = 0; 115*c4762a1bSJed Brown PetscBool fd_jacobian_coloring = PETSC_FALSE; 116*c4762a1bSJed Brown 117*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 118*c4762a1bSJed Brown Initialize program 119*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 120*c4762a1bSJed Brown 121*c4762a1bSJed Brown ierr = PetscInitialize(&argc,&argv,"options.inf",help);if (ierr) return ierr; 122*c4762a1bSJed Brown ierr = MPI_Comm_rank(MPI_COMM_WORLD,&rank);CHKERRQ(ierr); 123*c4762a1bSJed Brown ierr = MPI_Comm_size(MPI_COMM_WORLD,&size);CHKERRQ(ierr); 124*c4762a1bSJed Brown 125*c4762a1bSJed Brown /* The current input file options.inf is for 2 proc run only */ 126*c4762a1bSJed Brown if (size != 2) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This example currently runs on 2 procs only."); 127*c4762a1bSJed Brown 128*c4762a1bSJed Brown /* 129*c4762a1bSJed Brown Initialize problem parameters 130*c4762a1bSJed Brown */ 131*c4762a1bSJed Brown user.Nvglobal = 16; /*Global # of vertices */ 132*c4762a1bSJed Brown user.Neglobal = 18; /*Global # of elements */ 133*c4762a1bSJed Brown 134*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-vert",&user.Nvglobal,NULL);CHKERRQ(ierr); 135*c4762a1bSJed Brown ierr = PetscOptionsGetInt(NULL,NULL,"-elem",&user.Neglobal,NULL);CHKERRQ(ierr); 136*c4762a1bSJed Brown 137*c4762a1bSJed Brown user.non_lin_param = 0.06; 138*c4762a1bSJed Brown 139*c4762a1bSJed Brown ierr = PetscOptionsGetReal(NULL,NULL,"-nl_par",&user.non_lin_param,NULL);CHKERRQ(ierr); 140*c4762a1bSJed Brown 141*c4762a1bSJed Brown user.lin_param = -1.0; 142*c4762a1bSJed Brown 143*c4762a1bSJed Brown ierr = PetscOptionsGetReal(NULL,NULL,"-lin_par",&user.lin_param,NULL);CHKERRQ(ierr); 144*c4762a1bSJed Brown 145*c4762a1bSJed Brown user.Nvlocal = 0; 146*c4762a1bSJed Brown user.Nelocal = 0; 147*c4762a1bSJed Brown 148*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 149*c4762a1bSJed Brown Read the mesh and partitioning information 150*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 151*c4762a1bSJed Brown 152*c4762a1bSJed Brown /* 153*c4762a1bSJed Brown Read the mesh and partitioning information from 'adj.in'. 154*c4762a1bSJed Brown The file format is as follows. 155*c4762a1bSJed Brown For each line the first entry is the processor rank where the 156*c4762a1bSJed Brown current node belongs. The second entry is the number of 157*c4762a1bSJed Brown neighbors of a node. The rest of the line is the adjacency 158*c4762a1bSJed Brown list of a node. Currently this file is set up to work on two 159*c4762a1bSJed Brown processors. 160*c4762a1bSJed Brown 161*c4762a1bSJed Brown This is not a very good example of reading input. In the future, 162*c4762a1bSJed Brown we will put an example that shows the style that should be 163*c4762a1bSJed Brown used in a real application, where partitioning will be done 164*c4762a1bSJed Brown dynamically by calling partitioning routines (at present, we have 165*c4762a1bSJed Brown a ready interface to ParMeTiS). 166*c4762a1bSJed Brown */ 167*c4762a1bSJed Brown fptr = fopen("adj.in","r"); 168*c4762a1bSJed Brown if (!fptr) SETERRQ(PETSC_COMM_SELF,0,"Could not open adj.in"); 169*c4762a1bSJed Brown 170*c4762a1bSJed Brown /* 171*c4762a1bSJed Brown Each processor writes to the file output.<rank> where rank is the 172*c4762a1bSJed Brown processor's rank. 173*c4762a1bSJed Brown */ 174*c4762a1bSJed Brown sprintf(part_name,"output.%d",rank); 175*c4762a1bSJed Brown fptr1 = fopen(part_name,"w"); 176*c4762a1bSJed Brown if (!fptr1) SETERRQ(PETSC_COMM_SELF,0,"Could no open output file"); 177*c4762a1bSJed Brown ierr = PetscMalloc1(user.Nvglobal,&user.gloInd);CHKERRQ(ierr); 178*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Rank is %d\n",rank);CHKERRQ(ierr); 179*c4762a1bSJed Brown for (inode = 0; inode < user.Nvglobal; inode++) { 180*c4762a1bSJed Brown if (!fgets(str,256,fptr)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_READ,"fgets read failed"); 181*c4762a1bSJed Brown sscanf(str,"%d",&dtmp);user.v2p[inode] = dtmp; 182*c4762a1bSJed Brown if (user.v2p[inode] == rank) { 183*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Node %D belongs to processor %D\n",inode,user.v2p[inode]);CHKERRQ(ierr); 184*c4762a1bSJed Brown 185*c4762a1bSJed Brown user.gloInd[user.Nvlocal] = inode; 186*c4762a1bSJed Brown sscanf(str,"%*d %d",&dtmp); 187*c4762a1bSJed Brown nbrs = dtmp; 188*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Number of neighbors for the vertex %D is %D\n",inode,nbrs);CHKERRQ(ierr); 189*c4762a1bSJed Brown 190*c4762a1bSJed Brown user.itot[user.Nvlocal] = nbrs; 191*c4762a1bSJed Brown Nvneighborstotal += nbrs; 192*c4762a1bSJed Brown for (i = 0; i < user.itot[user.Nvlocal]; i++) { 193*c4762a1bSJed Brown form[0]='\0'; 194*c4762a1bSJed Brown for (j=0; j < i+2; j++) { 195*c4762a1bSJed Brown ierr = PetscStrlcat(form,"%*d ",sizeof(form));CHKERRQ(ierr); 196*c4762a1bSJed Brown } 197*c4762a1bSJed Brown ierr = PetscStrlcat(form,"%d",sizeof(form));CHKERRQ(ierr); 198*c4762a1bSJed Brown 199*c4762a1bSJed Brown sscanf(str,form,&dtmp); 200*c4762a1bSJed Brown user.AdjM[user.Nvlocal][i] = dtmp; 201*c4762a1bSJed Brown 202*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"%D ",user.AdjM[user.Nvlocal][i]);CHKERRQ(ierr); 203*c4762a1bSJed Brown } 204*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"\n");CHKERRQ(ierr); 205*c4762a1bSJed Brown user.Nvlocal++; 206*c4762a1bSJed Brown } 207*c4762a1bSJed Brown } 208*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Total # of Local Vertices is %D \n",user.Nvlocal);CHKERRQ(ierr); 209*c4762a1bSJed Brown 210*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 211*c4762a1bSJed Brown Create different orderings 212*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 213*c4762a1bSJed Brown 214*c4762a1bSJed Brown /* 215*c4762a1bSJed Brown Create the local ordering list for vertices. First a list using the PETSc global 216*c4762a1bSJed Brown ordering is created. Then we use the AO object to get the PETSc-to-application and 217*c4762a1bSJed Brown application-to-PETSc mappings. Each vertex also gets a local index (stored in the 218*c4762a1bSJed Brown locInd array). 219*c4762a1bSJed Brown */ 220*c4762a1bSJed Brown ierr = MPI_Scan(&user.Nvlocal,&rstart,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);CHKERRQ(ierr); 221*c4762a1bSJed Brown rstart -= user.Nvlocal; 222*c4762a1bSJed Brown ierr = PetscMalloc1(user.Nvlocal,&pordering);CHKERRQ(ierr); 223*c4762a1bSJed Brown 224*c4762a1bSJed Brown for (i=0; i < user.Nvlocal; i++) pordering[i] = rstart + i; 225*c4762a1bSJed Brown 226*c4762a1bSJed Brown /* 227*c4762a1bSJed Brown Create the AO object 228*c4762a1bSJed Brown */ 229*c4762a1bSJed Brown ierr = AOCreateBasic(MPI_COMM_WORLD,user.Nvlocal,user.gloInd,pordering,&ao);CHKERRQ(ierr); 230*c4762a1bSJed Brown ierr = PetscFree(pordering);CHKERRQ(ierr); 231*c4762a1bSJed Brown 232*c4762a1bSJed Brown /* 233*c4762a1bSJed Brown Keep the global indices for later use 234*c4762a1bSJed Brown */ 235*c4762a1bSJed Brown ierr = PetscMalloc1(user.Nvlocal,&user.locInd);CHKERRQ(ierr); 236*c4762a1bSJed Brown ierr = PetscMalloc1(Nvneighborstotal,&tmp);CHKERRQ(ierr); 237*c4762a1bSJed Brown 238*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 239*c4762a1bSJed Brown Demonstrate the use of AO functionality 240*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 241*c4762a1bSJed Brown 242*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Before AOApplicationToPetsc, local indices are : \n");CHKERRQ(ierr); 243*c4762a1bSJed Brown for (i=0; i < user.Nvlocal; i++) { 244*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1," %D ",user.gloInd[i]);CHKERRQ(ierr); 245*c4762a1bSJed Brown 246*c4762a1bSJed Brown user.locInd[i] = user.gloInd[i]; 247*c4762a1bSJed Brown } 248*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"\n");CHKERRQ(ierr); 249*c4762a1bSJed Brown jstart = 0; 250*c4762a1bSJed Brown for (i=0; i < user.Nvlocal; i++) { 251*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Neghbors of local vertex %D are : ",user.gloInd[i]);CHKERRQ(ierr); 252*c4762a1bSJed Brown for (j=0; j < user.itot[i]; j++) { 253*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"%D ",user.AdjM[i][j]);CHKERRQ(ierr); 254*c4762a1bSJed Brown 255*c4762a1bSJed Brown tmp[j + jstart] = user.AdjM[i][j]; 256*c4762a1bSJed Brown } 257*c4762a1bSJed Brown jstart += user.itot[i]; 258*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"\n");CHKERRQ(ierr); 259*c4762a1bSJed Brown } 260*c4762a1bSJed Brown 261*c4762a1bSJed Brown /* 262*c4762a1bSJed Brown Now map the vlocal and neighbor lists to the PETSc ordering 263*c4762a1bSJed Brown */ 264*c4762a1bSJed Brown ierr = AOApplicationToPetsc(ao,user.Nvlocal,user.locInd);CHKERRQ(ierr); 265*c4762a1bSJed Brown ierr = AOApplicationToPetsc(ao,Nvneighborstotal,tmp);CHKERRQ(ierr); 266*c4762a1bSJed Brown ierr = AODestroy(&ao);CHKERRQ(ierr); 267*c4762a1bSJed Brown 268*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"After AOApplicationToPetsc, local indices are : \n");CHKERRQ(ierr); 269*c4762a1bSJed Brown for (i=0; i < user.Nvlocal; i++) { 270*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1," %D ",user.locInd[i]);CHKERRQ(ierr); 271*c4762a1bSJed Brown } 272*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"\n");CHKERRQ(ierr); 273*c4762a1bSJed Brown 274*c4762a1bSJed Brown jstart = 0; 275*c4762a1bSJed Brown for (i=0; i < user.Nvlocal; i++) { 276*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Neghbors of local vertex %D are : ",user.locInd[i]);CHKERRQ(ierr); 277*c4762a1bSJed Brown for (j=0; j < user.itot[i]; j++) { 278*c4762a1bSJed Brown user.AdjM[i][j] = tmp[j+jstart]; 279*c4762a1bSJed Brown 280*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"%D ",user.AdjM[i][j]);CHKERRQ(ierr); 281*c4762a1bSJed Brown } 282*c4762a1bSJed Brown jstart += user.itot[i]; 283*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"\n");CHKERRQ(ierr); 284*c4762a1bSJed Brown } 285*c4762a1bSJed Brown 286*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 287*c4762a1bSJed Brown Extract the ghost vertex information for each processor 288*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 289*c4762a1bSJed Brown /* 290*c4762a1bSJed Brown Next, we need to generate a list of vertices required for this processor 291*c4762a1bSJed Brown and a local numbering scheme for all vertices required on this processor. 292*c4762a1bSJed Brown vertices - integer array of all vertices needed on this processor in PETSc 293*c4762a1bSJed Brown global numbering; this list consists of first the "locally owned" 294*c4762a1bSJed Brown vertices followed by the ghost vertices. 295*c4762a1bSJed Brown verticesmask - integer array that for each global vertex lists its local 296*c4762a1bSJed Brown vertex number (in vertices) + 1. If the global vertex is not 297*c4762a1bSJed Brown represented on this processor, then the corresponding 298*c4762a1bSJed Brown entry in verticesmask is zero 299*c4762a1bSJed Brown 300*c4762a1bSJed Brown Note: vertices and verticesmask are both Nvglobal in length; this may 301*c4762a1bSJed Brown sound terribly non-scalable, but in fact is not so bad for a reasonable 302*c4762a1bSJed Brown number of processors. Importantly, it allows us to use NO SEARCHING 303*c4762a1bSJed Brown in setting up the data structures. 304*c4762a1bSJed Brown */ 305*c4762a1bSJed Brown ierr = PetscMalloc1(user.Nvglobal,&vertices);CHKERRQ(ierr); 306*c4762a1bSJed Brown ierr = PetscCalloc1(user.Nvglobal,&verticesmask);CHKERRQ(ierr); 307*c4762a1bSJed Brown nvertices = 0; 308*c4762a1bSJed Brown 309*c4762a1bSJed Brown /* 310*c4762a1bSJed Brown First load "owned vertices" into list 311*c4762a1bSJed Brown */ 312*c4762a1bSJed Brown for (i=0; i < user.Nvlocal; i++) { 313*c4762a1bSJed Brown vertices[nvertices++] = user.locInd[i]; 314*c4762a1bSJed Brown verticesmask[user.locInd[i]] = nvertices; 315*c4762a1bSJed Brown } 316*c4762a1bSJed Brown 317*c4762a1bSJed Brown /* 318*c4762a1bSJed Brown Now load ghost vertices into list 319*c4762a1bSJed Brown */ 320*c4762a1bSJed Brown for (i=0; i < user.Nvlocal; i++) { 321*c4762a1bSJed Brown for (j=0; j < user.itot[i]; j++) { 322*c4762a1bSJed Brown nb = user.AdjM[i][j]; 323*c4762a1bSJed Brown if (!verticesmask[nb]) { 324*c4762a1bSJed Brown vertices[nvertices++] = nb; 325*c4762a1bSJed Brown verticesmask[nb] = nvertices; 326*c4762a1bSJed Brown } 327*c4762a1bSJed Brown } 328*c4762a1bSJed Brown } 329*c4762a1bSJed Brown 330*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"\n");CHKERRQ(ierr); 331*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"The array vertices is :\n");CHKERRQ(ierr); 332*c4762a1bSJed Brown for (i=0; i < nvertices; i++) { 333*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"%D ",vertices[i]);CHKERRQ(ierr); 334*c4762a1bSJed Brown } 335*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"\n");CHKERRQ(ierr); 336*c4762a1bSJed Brown 337*c4762a1bSJed Brown /* 338*c4762a1bSJed Brown Map the vertices listed in the neighbors to the local numbering from 339*c4762a1bSJed Brown the global ordering that they contained initially. 340*c4762a1bSJed Brown */ 341*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"\n");CHKERRQ(ierr); 342*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"After mapping neighbors in the local contiguous ordering\n");CHKERRQ(ierr); 343*c4762a1bSJed Brown for (i=0; i<user.Nvlocal; i++) { 344*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Neghbors of local vertex %D are :\n",i);CHKERRQ(ierr); 345*c4762a1bSJed Brown for (j = 0; j < user.itot[i]; j++) { 346*c4762a1bSJed Brown nb = user.AdjM[i][j]; 347*c4762a1bSJed Brown user.AdjM[i][j] = verticesmask[nb] - 1; 348*c4762a1bSJed Brown 349*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"%D ",user.AdjM[i][j]);CHKERRQ(ierr); 350*c4762a1bSJed Brown } 351*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"\n");CHKERRQ(ierr); 352*c4762a1bSJed Brown } 353*c4762a1bSJed Brown 354*c4762a1bSJed Brown N = user.Nvglobal; 355*c4762a1bSJed Brown 356*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 357*c4762a1bSJed Brown Create vector and matrix data structures 358*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 359*c4762a1bSJed Brown 360*c4762a1bSJed Brown /* 361*c4762a1bSJed Brown Create vector data structures 362*c4762a1bSJed Brown */ 363*c4762a1bSJed Brown ierr = VecCreate(MPI_COMM_WORLD,&x);CHKERRQ(ierr); 364*c4762a1bSJed Brown ierr = VecSetSizes(x,user.Nvlocal,N);CHKERRQ(ierr); 365*c4762a1bSJed Brown ierr = VecSetFromOptions(x);CHKERRQ(ierr); 366*c4762a1bSJed Brown ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 367*c4762a1bSJed Brown ierr = VecCreateSeq(MPI_COMM_SELF,bs*nvertices,&user.localX);CHKERRQ(ierr); 368*c4762a1bSJed Brown ierr = VecDuplicate(user.localX,&user.localF);CHKERRQ(ierr); 369*c4762a1bSJed Brown 370*c4762a1bSJed Brown /* 371*c4762a1bSJed Brown Create the scatter between the global representation and the 372*c4762a1bSJed Brown local representation 373*c4762a1bSJed Brown */ 374*c4762a1bSJed Brown ierr = ISCreateStride(MPI_COMM_SELF,bs*nvertices,0,1,&islocal);CHKERRQ(ierr); 375*c4762a1bSJed Brown ierr = ISCreateBlock(MPI_COMM_SELF,bs,nvertices,vertices,PETSC_COPY_VALUES,&isglobal);CHKERRQ(ierr); 376*c4762a1bSJed Brown ierr = VecScatterCreate(x,isglobal,user.localX,islocal,&user.scatter);CHKERRQ(ierr); 377*c4762a1bSJed Brown ierr = ISDestroy(&isglobal);CHKERRQ(ierr); 378*c4762a1bSJed Brown ierr = ISDestroy(&islocal);CHKERRQ(ierr); 379*c4762a1bSJed Brown 380*c4762a1bSJed Brown /* 381*c4762a1bSJed Brown Create matrix data structure; Just to keep the example simple, we have not done any 382*c4762a1bSJed Brown preallocation of memory for the matrix. In real application code with big matrices, 383*c4762a1bSJed Brown preallocation should always be done to expedite the matrix creation. 384*c4762a1bSJed Brown */ 385*c4762a1bSJed Brown ierr = MatCreate(MPI_COMM_WORLD,&Jac);CHKERRQ(ierr); 386*c4762a1bSJed Brown ierr = MatSetSizes(Jac,PETSC_DECIDE,PETSC_DECIDE,N,N);CHKERRQ(ierr); 387*c4762a1bSJed Brown ierr = MatSetFromOptions(Jac);CHKERRQ(ierr); 388*c4762a1bSJed Brown ierr = MatSetUp(Jac);CHKERRQ(ierr); 389*c4762a1bSJed Brown 390*c4762a1bSJed Brown /* 391*c4762a1bSJed Brown The following routine allows us to set the matrix values in local ordering 392*c4762a1bSJed Brown */ 393*c4762a1bSJed Brown ierr = ISLocalToGlobalMappingCreate(MPI_COMM_SELF,bs,nvertices,vertices,PETSC_COPY_VALUES,&isl2g);CHKERRQ(ierr); 394*c4762a1bSJed Brown ierr = MatSetLocalToGlobalMapping(Jac,isl2g,isl2g);CHKERRQ(ierr); 395*c4762a1bSJed Brown 396*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 397*c4762a1bSJed Brown Create nonlinear solver context 398*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 399*c4762a1bSJed Brown 400*c4762a1bSJed Brown ierr = SNESCreate(MPI_COMM_WORLD,&snes);CHKERRQ(ierr); 401*c4762a1bSJed Brown ierr = SNESSetType(snes,type);CHKERRQ(ierr); 402*c4762a1bSJed Brown 403*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 404*c4762a1bSJed Brown Set routines for function and Jacobian evaluation 405*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 406*c4762a1bSJed Brown ierr = SNESSetFunction(snes,r,FormFunction,(void*)&user);CHKERRQ(ierr); 407*c4762a1bSJed Brown 408*c4762a1bSJed Brown ierr = PetscOptionsGetBool(NULL,NULL,"-fd_jacobian_coloring",&fd_jacobian_coloring,0);CHKERRQ(ierr); 409*c4762a1bSJed Brown if (!fd_jacobian_coloring) { 410*c4762a1bSJed Brown ierr = SNESSetJacobian(snes,Jac,Jac,FormJacobian,(void*)&user);CHKERRQ(ierr); 411*c4762a1bSJed Brown } else { /* Use matfdcoloring */ 412*c4762a1bSJed Brown ISColoring iscoloring; 413*c4762a1bSJed Brown MatColoring mc; 414*c4762a1bSJed Brown 415*c4762a1bSJed Brown /* Get the data structure of Jac */ 416*c4762a1bSJed Brown ierr = FormJacobian(snes,x,Jac,Jac,&user);CHKERRQ(ierr); 417*c4762a1bSJed Brown /* Create coloring context */ 418*c4762a1bSJed Brown ierr = MatColoringCreate(Jac,&mc);CHKERRQ(ierr); 419*c4762a1bSJed Brown ierr = MatColoringSetType(mc,MATCOLORINGSL);CHKERRQ(ierr); 420*c4762a1bSJed Brown ierr = MatColoringSetFromOptions(mc);CHKERRQ(ierr); 421*c4762a1bSJed Brown ierr = MatColoringApply(mc,&iscoloring);CHKERRQ(ierr); 422*c4762a1bSJed Brown ierr = MatColoringDestroy(&mc);CHKERRQ(ierr); 423*c4762a1bSJed Brown ierr = MatFDColoringCreate(Jac,iscoloring,&matfdcoloring);CHKERRQ(ierr); 424*c4762a1bSJed Brown ierr = MatFDColoringSetFunction(matfdcoloring,(PetscErrorCode (*)(void))FormFunction,&user);CHKERRQ(ierr); 425*c4762a1bSJed Brown ierr = MatFDColoringSetFromOptions(matfdcoloring);CHKERRQ(ierr); 426*c4762a1bSJed Brown ierr = MatFDColoringSetUp(Jac,iscoloring,matfdcoloring);CHKERRQ(ierr); 427*c4762a1bSJed Brown /* ierr = MatFDColoringView(matfdcoloring,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */ 428*c4762a1bSJed Brown ierr = SNESSetJacobian(snes,Jac,Jac,SNESComputeJacobianDefaultColor,matfdcoloring);CHKERRQ(ierr); 429*c4762a1bSJed Brown ierr = ISColoringDestroy(&iscoloring);CHKERRQ(ierr); 430*c4762a1bSJed Brown } 431*c4762a1bSJed Brown 432*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 433*c4762a1bSJed Brown Customize nonlinear solver; set runtime options 434*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 435*c4762a1bSJed Brown 436*c4762a1bSJed Brown ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 437*c4762a1bSJed Brown 438*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 439*c4762a1bSJed Brown Evaluate initial guess; then solve nonlinear system 440*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 441*c4762a1bSJed Brown 442*c4762a1bSJed Brown /* 443*c4762a1bSJed Brown Note: The user should initialize the vector, x, with the initial guess 444*c4762a1bSJed Brown for the nonlinear solver prior to calling SNESSolve(). In particular, 445*c4762a1bSJed Brown to employ an initial guess of zero, the user should explicitly set 446*c4762a1bSJed Brown this vector to zero by calling VecSet(). 447*c4762a1bSJed Brown */ 448*c4762a1bSJed Brown ierr = FormInitialGuess(&user,x);CHKERRQ(ierr); 449*c4762a1bSJed Brown 450*c4762a1bSJed Brown /* 451*c4762a1bSJed Brown Print the initial guess 452*c4762a1bSJed Brown */ 453*c4762a1bSJed Brown ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 454*c4762a1bSJed Brown for (inode = 0; inode < user.Nvlocal; inode++) { 455*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Initial Solution at node %D is %f \n",inode,xx[inode]);CHKERRQ(ierr); 456*c4762a1bSJed Brown } 457*c4762a1bSJed Brown ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 458*c4762a1bSJed Brown 459*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 460*c4762a1bSJed Brown Now solve the nonlinear system 461*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 462*c4762a1bSJed Brown 463*c4762a1bSJed Brown ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); 464*c4762a1bSJed Brown ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); 465*c4762a1bSJed Brown ierr = SNESGetNonlinearStepFailures(snes,&nfails);CHKERRQ(ierr); 466*c4762a1bSJed Brown 467*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 468*c4762a1bSJed Brown Print the output : solution vector and other information 469*c4762a1bSJed Brown Each processor writes to the file output.<rank> where rank is the 470*c4762a1bSJed Brown processor's rank. 471*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 472*c4762a1bSJed Brown 473*c4762a1bSJed Brown ierr = VecGetArray(x,&xx);CHKERRQ(ierr); 474*c4762a1bSJed Brown for (inode = 0; inode < user.Nvlocal; inode++) { 475*c4762a1bSJed Brown ierr = PetscFPrintf(PETSC_COMM_SELF,fptr1,"Solution at node %D is %f \n",inode,xx[inode]);CHKERRQ(ierr); 476*c4762a1bSJed Brown } 477*c4762a1bSJed Brown ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); 478*c4762a1bSJed Brown fclose(fptr1); 479*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_WORLD,"number of SNES iterations = %D, ",its);CHKERRQ(ierr); 480*c4762a1bSJed Brown ierr = PetscPrintf(MPI_COMM_WORLD,"number of unsuccessful steps = %D\n",nfails);CHKERRQ(ierr); 481*c4762a1bSJed Brown 482*c4762a1bSJed Brown /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 483*c4762a1bSJed Brown Free work space. All PETSc objects should be destroyed when they 484*c4762a1bSJed Brown are no longer needed. 485*c4762a1bSJed Brown - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 486*c4762a1bSJed Brown ierr = PetscFree(user.gloInd);CHKERRQ(ierr); 487*c4762a1bSJed Brown ierr = PetscFree(user.locInd);CHKERRQ(ierr); 488*c4762a1bSJed Brown ierr = PetscFree(vertices);CHKERRQ(ierr); 489*c4762a1bSJed Brown ierr = PetscFree(verticesmask);CHKERRQ(ierr); 490*c4762a1bSJed Brown ierr = PetscFree(tmp);CHKERRQ(ierr); 491*c4762a1bSJed Brown ierr = VecScatterDestroy(&user.scatter);CHKERRQ(ierr); 492*c4762a1bSJed Brown ierr = ISLocalToGlobalMappingDestroy(&isl2g);CHKERRQ(ierr); 493*c4762a1bSJed Brown ierr = VecDestroy(&x);CHKERRQ(ierr); 494*c4762a1bSJed Brown ierr = VecDestroy(&r);CHKERRQ(ierr); 495*c4762a1bSJed Brown ierr = VecDestroy(&user.localX);CHKERRQ(ierr); 496*c4762a1bSJed Brown ierr = VecDestroy(&user.localF);CHKERRQ(ierr); 497*c4762a1bSJed Brown ierr = MatDestroy(&Jac);CHKERRQ(ierr); ierr = SNESDestroy(&snes);CHKERRQ(ierr); 498*c4762a1bSJed Brown /*ierr = PetscDrawDestroy(draw);CHKERRQ(ierr);*/ 499*c4762a1bSJed Brown if (fd_jacobian_coloring) { 500*c4762a1bSJed Brown ierr = MatFDColoringDestroy(&matfdcoloring);CHKERRQ(ierr); 501*c4762a1bSJed Brown } 502*c4762a1bSJed Brown ierr = PetscFinalize(); 503*c4762a1bSJed Brown return ierr; 504*c4762a1bSJed Brown } 505*c4762a1bSJed Brown /* -------------------- Form initial approximation ----------------- */ 506*c4762a1bSJed Brown 507*c4762a1bSJed Brown /* 508*c4762a1bSJed Brown FormInitialGuess - Forms initial approximation. 509*c4762a1bSJed Brown 510*c4762a1bSJed Brown Input Parameters: 511*c4762a1bSJed Brown user - user-defined application context 512*c4762a1bSJed Brown X - vector 513*c4762a1bSJed Brown 514*c4762a1bSJed Brown Output Parameter: 515*c4762a1bSJed Brown X - vector 516*c4762a1bSJed Brown */ 517*c4762a1bSJed Brown PetscErrorCode FormInitialGuess(AppCtx *user,Vec X) 518*c4762a1bSJed Brown { 519*c4762a1bSJed Brown PetscInt i,Nvlocal,ierr; 520*c4762a1bSJed Brown PetscInt *gloInd; 521*c4762a1bSJed Brown PetscScalar *x; 522*c4762a1bSJed Brown #if defined(UNUSED_VARIABLES) 523*c4762a1bSJed Brown PetscReal temp1,temp,hx,hy,hxdhy,hydhx,sc; 524*c4762a1bSJed Brown PetscInt Neglobal,Nvglobal,j,row; 525*c4762a1bSJed Brown PetscReal alpha,lambda; 526*c4762a1bSJed Brown 527*c4762a1bSJed Brown Nvglobal = user->Nvglobal; 528*c4762a1bSJed Brown Neglobal = user->Neglobal; 529*c4762a1bSJed Brown lambda = user->non_lin_param; 530*c4762a1bSJed Brown alpha = user->lin_param; 531*c4762a1bSJed Brown #endif 532*c4762a1bSJed Brown 533*c4762a1bSJed Brown Nvlocal = user->Nvlocal; 534*c4762a1bSJed Brown gloInd = user->gloInd; 535*c4762a1bSJed Brown 536*c4762a1bSJed Brown /* 537*c4762a1bSJed Brown Get a pointer to vector data. 538*c4762a1bSJed Brown - For default PETSc vectors, VecGetArray() returns a pointer to 539*c4762a1bSJed Brown the data array. Otherwise, the routine is implementation dependent. 540*c4762a1bSJed Brown - You MUST call VecRestoreArray() when you no longer need access to 541*c4762a1bSJed Brown the array. 542*c4762a1bSJed Brown */ 543*c4762a1bSJed Brown ierr = VecGetArray(X,&x);CHKERRQ(ierr); 544*c4762a1bSJed Brown 545*c4762a1bSJed Brown /* 546*c4762a1bSJed Brown Compute initial guess over the locally owned part of the grid 547*c4762a1bSJed Brown */ 548*c4762a1bSJed Brown for (i=0; i < Nvlocal; i++) x[i] = (PetscReal)gloInd[i]; 549*c4762a1bSJed Brown 550*c4762a1bSJed Brown /* 551*c4762a1bSJed Brown Restore vector 552*c4762a1bSJed Brown */ 553*c4762a1bSJed Brown ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); 554*c4762a1bSJed Brown return 0; 555*c4762a1bSJed Brown } 556*c4762a1bSJed Brown /* -------------------- Evaluate Function F(x) --------------------- */ 557*c4762a1bSJed Brown /* 558*c4762a1bSJed Brown FormFunction - Evaluates nonlinear function, F(x). 559*c4762a1bSJed Brown 560*c4762a1bSJed Brown Input Parameters: 561*c4762a1bSJed Brown . snes - the SNES context 562*c4762a1bSJed Brown . X - input vector 563*c4762a1bSJed Brown . ptr - optional user-defined context, as set by SNESSetFunction() 564*c4762a1bSJed Brown 565*c4762a1bSJed Brown Output Parameter: 566*c4762a1bSJed Brown . F - function vector 567*c4762a1bSJed Brown */ 568*c4762a1bSJed Brown PetscErrorCode FormFunction(SNES snes,Vec X,Vec F,void *ptr) 569*c4762a1bSJed Brown { 570*c4762a1bSJed Brown AppCtx *user = (AppCtx*)ptr; 571*c4762a1bSJed Brown PetscErrorCode ierr; 572*c4762a1bSJed Brown PetscInt i,j,Nvlocal; 573*c4762a1bSJed Brown PetscReal alpha,lambda; 574*c4762a1bSJed Brown PetscScalar *x,*f; 575*c4762a1bSJed Brown VecScatter scatter; 576*c4762a1bSJed Brown Vec localX = user->localX; 577*c4762a1bSJed Brown #if defined(UNUSED_VARIABLES) 578*c4762a1bSJed Brown PetscScalar ut,ub,ul,ur,u,*g,sc,uyy,uxx; 579*c4762a1bSJed Brown PetscReal hx,hy,hxdhy,hydhx; 580*c4762a1bSJed Brown PetscReal two = 2.0,one = 1.0; 581*c4762a1bSJed Brown PetscInt Nvglobal,Neglobal,row; 582*c4762a1bSJed Brown PetscInt *gloInd; 583*c4762a1bSJed Brown 584*c4762a1bSJed Brown Nvglobal = user->Nvglobal; 585*c4762a1bSJed Brown Neglobal = user->Neglobal; 586*c4762a1bSJed Brown gloInd = user->gloInd; 587*c4762a1bSJed Brown #endif 588*c4762a1bSJed Brown 589*c4762a1bSJed Brown Nvlocal = user->Nvlocal; 590*c4762a1bSJed Brown lambda = user->non_lin_param; 591*c4762a1bSJed Brown alpha = user->lin_param; 592*c4762a1bSJed Brown scatter = user->scatter; 593*c4762a1bSJed Brown 594*c4762a1bSJed Brown /* 595*c4762a1bSJed Brown PDE : L(u) + lambda*u*u +alpha*u = 0 where L(u) is the approximate Laplacian as 596*c4762a1bSJed Brown described in the beginning of this code 597*c4762a1bSJed Brown 598*c4762a1bSJed Brown First scatter the distributed vector X into local vector localX (that includes 599*c4762a1bSJed Brown values for ghost nodes. If we wish,we can put some other work between 600*c4762a1bSJed Brown VecScatterBegin() and VecScatterEnd() to overlap the communication with 601*c4762a1bSJed Brown computation. 602*c4762a1bSJed Brown */ 603*c4762a1bSJed Brown ierr = VecScatterBegin(scatter,X,localX,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 604*c4762a1bSJed Brown ierr = VecScatterEnd(scatter,X,localX,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 605*c4762a1bSJed Brown 606*c4762a1bSJed Brown /* 607*c4762a1bSJed Brown Get pointers to vector data 608*c4762a1bSJed Brown */ 609*c4762a1bSJed Brown ierr = VecGetArray(localX,&x);CHKERRQ(ierr); 610*c4762a1bSJed Brown ierr = VecGetArray(F,&f);CHKERRQ(ierr); 611*c4762a1bSJed Brown 612*c4762a1bSJed Brown /* 613*c4762a1bSJed Brown Now compute the f(x). As mentioned earlier, the computed Laplacian is just an 614*c4762a1bSJed Brown approximate one chosen for illustrative purpose only. Another point to notice 615*c4762a1bSJed Brown is that this is a local (completly parallel) calculation. In practical application 616*c4762a1bSJed Brown codes, function calculation time is a dominat portion of the overall execution time. 617*c4762a1bSJed Brown */ 618*c4762a1bSJed Brown for (i=0; i < Nvlocal; i++) { 619*c4762a1bSJed Brown f[i] = (user->itot[i] - alpha)*x[i] - lambda*x[i]*x[i]; 620*c4762a1bSJed Brown for (j = 0; j < user->itot[i]; j++) f[i] -= x[user->AdjM[i][j]]; 621*c4762a1bSJed Brown } 622*c4762a1bSJed Brown 623*c4762a1bSJed Brown /* 624*c4762a1bSJed Brown Restore vectors 625*c4762a1bSJed Brown */ 626*c4762a1bSJed Brown ierr = VecRestoreArray(localX,&x);CHKERRQ(ierr); 627*c4762a1bSJed Brown ierr = VecRestoreArray(F,&f);CHKERRQ(ierr); 628*c4762a1bSJed Brown /*ierr = VecView(F,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);*/ 629*c4762a1bSJed Brown 630*c4762a1bSJed Brown return 0; 631*c4762a1bSJed Brown } 632*c4762a1bSJed Brown 633*c4762a1bSJed Brown /* -------------------- Evaluate Jacobian F'(x) -------------------- */ 634*c4762a1bSJed Brown /* 635*c4762a1bSJed Brown FormJacobian - Evaluates Jacobian matrix. 636*c4762a1bSJed Brown 637*c4762a1bSJed Brown Input Parameters: 638*c4762a1bSJed Brown . snes - the SNES context 639*c4762a1bSJed Brown . X - input vector 640*c4762a1bSJed Brown . ptr - optional user-defined context, as set by SNESSetJacobian() 641*c4762a1bSJed Brown 642*c4762a1bSJed Brown Output Parameters: 643*c4762a1bSJed Brown . A - Jacobian matrix 644*c4762a1bSJed Brown . B - optionally different preconditioning matrix 645*c4762a1bSJed Brown . flag - flag indicating matrix structure 646*c4762a1bSJed Brown 647*c4762a1bSJed Brown */ 648*c4762a1bSJed Brown PetscErrorCode FormJacobian(SNES snes,Vec X,Mat J,Mat jac,void *ptr) 649*c4762a1bSJed Brown { 650*c4762a1bSJed Brown AppCtx *user = (AppCtx*)ptr; 651*c4762a1bSJed Brown PetscInt i,j,Nvlocal,col[50],ierr; 652*c4762a1bSJed Brown PetscScalar alpha,lambda,value[50]; 653*c4762a1bSJed Brown Vec localX = user->localX; 654*c4762a1bSJed Brown VecScatter scatter; 655*c4762a1bSJed Brown PetscScalar *x; 656*c4762a1bSJed Brown #if defined(UNUSED_VARIABLES) 657*c4762a1bSJed Brown PetscScalar two = 2.0,one = 1.0; 658*c4762a1bSJed Brown PetscInt row,Nvglobal,Neglobal; 659*c4762a1bSJed Brown PetscInt *gloInd; 660*c4762a1bSJed Brown 661*c4762a1bSJed Brown Nvglobal = user->Nvglobal; 662*c4762a1bSJed Brown Neglobal = user->Neglobal; 663*c4762a1bSJed Brown gloInd = user->gloInd; 664*c4762a1bSJed Brown #endif 665*c4762a1bSJed Brown 666*c4762a1bSJed Brown /*printf("Entering into FormJacobian \n");*/ 667*c4762a1bSJed Brown Nvlocal = user->Nvlocal; 668*c4762a1bSJed Brown lambda = user->non_lin_param; 669*c4762a1bSJed Brown alpha = user->lin_param; 670*c4762a1bSJed Brown scatter = user->scatter; 671*c4762a1bSJed Brown 672*c4762a1bSJed Brown /* 673*c4762a1bSJed Brown PDE : L(u) + lambda*u*u +alpha*u = 0 where L(u) is the approximate Laplacian as 674*c4762a1bSJed Brown described in the beginning of this code 675*c4762a1bSJed Brown 676*c4762a1bSJed Brown First scatter the distributed vector X into local vector localX (that includes 677*c4762a1bSJed Brown values for ghost nodes. If we wish, we can put some other work between 678*c4762a1bSJed Brown VecScatterBegin() and VecScatterEnd() to overlap the communication with 679*c4762a1bSJed Brown computation. 680*c4762a1bSJed Brown */ 681*c4762a1bSJed Brown ierr = VecScatterBegin(scatter,X,localX,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 682*c4762a1bSJed Brown ierr = VecScatterEnd(scatter,X,localX,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 683*c4762a1bSJed Brown 684*c4762a1bSJed Brown /* 685*c4762a1bSJed Brown Get pointer to vector data 686*c4762a1bSJed Brown */ 687*c4762a1bSJed Brown ierr = VecGetArray(localX,&x);CHKERRQ(ierr); 688*c4762a1bSJed Brown 689*c4762a1bSJed Brown for (i=0; i < Nvlocal; i++) { 690*c4762a1bSJed Brown col[0] = i; 691*c4762a1bSJed Brown value[0] = user->itot[i] - 2.0*lambda*x[i] - alpha; 692*c4762a1bSJed Brown for (j = 0; j < user->itot[i]; j++) { 693*c4762a1bSJed Brown col[j+1] = user->AdjM[i][j]; 694*c4762a1bSJed Brown value[j+1] = -1.0; 695*c4762a1bSJed Brown } 696*c4762a1bSJed Brown 697*c4762a1bSJed Brown /* 698*c4762a1bSJed Brown Set the matrix values in the local ordering. Note that in order to use this 699*c4762a1bSJed Brown feature we must call the routine MatSetLocalToGlobalMapping() after the 700*c4762a1bSJed Brown matrix has been created. 701*c4762a1bSJed Brown */ 702*c4762a1bSJed Brown ierr = MatSetValuesLocal(jac,1,&i,1+user->itot[i],col,value,INSERT_VALUES);CHKERRQ(ierr); 703*c4762a1bSJed Brown } 704*c4762a1bSJed Brown 705*c4762a1bSJed Brown /* 706*c4762a1bSJed Brown Assemble matrix, using the 2-step process: 707*c4762a1bSJed Brown MatAssemblyBegin(), MatAssemblyEnd(). 708*c4762a1bSJed Brown Between these two calls, the pointer to vector data has been restored to 709*c4762a1bSJed Brown demonstrate the use of overlapping communicationn with computation. 710*c4762a1bSJed Brown */ 711*c4762a1bSJed Brown ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 712*c4762a1bSJed Brown ierr = VecRestoreArray(localX,&x);CHKERRQ(ierr); 713*c4762a1bSJed Brown ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 714*c4762a1bSJed Brown 715*c4762a1bSJed Brown /* 716*c4762a1bSJed Brown Tell the matrix we will never add a new nonzero location to the 717*c4762a1bSJed Brown matrix. If we do, it will generate an error. 718*c4762a1bSJed Brown */ 719*c4762a1bSJed Brown ierr = MatSetOption(jac,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 720*c4762a1bSJed Brown /* MatView(jac,PETSC_VIEWER_STDOUT_SELF); */ 721*c4762a1bSJed Brown return 0; 722*c4762a1bSJed Brown } 723*c4762a1bSJed Brown 724*c4762a1bSJed Brown 725*c4762a1bSJed Brown 726*c4762a1bSJed Brown /*TEST 727*c4762a1bSJed Brown 728*c4762a1bSJed Brown build: 729*c4762a1bSJed Brown requires: !complex 730*c4762a1bSJed Brown 731*c4762a1bSJed Brown test: 732*c4762a1bSJed Brown nsize: 2 733*c4762a1bSJed Brown args: -snes_monitor_short 734*c4762a1bSJed Brown localrunfiles: options.inf adj.in 735*c4762a1bSJed Brown 736*c4762a1bSJed Brown test: 737*c4762a1bSJed Brown suffix: 2 738*c4762a1bSJed Brown nsize: 2 739*c4762a1bSJed Brown args: -snes_monitor_short -fd_jacobian_coloring 740*c4762a1bSJed Brown localrunfiles: options.inf adj.in 741*c4762a1bSJed Brown 742*c4762a1bSJed Brown TEST*/ 743