1d4002b98SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h> 2c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 30d1c53f1SHong Zhang #include <../src/mat/impls/baij/mpi/mpibaij.h> 4af0996ceSBarry Smith #include <petsc/private/isimpl.h> 50d1c53f1SHong Zhang 6d1e9a80fSBarry Smith PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,void *sctx) 70d1c53f1SHong Zhang { 80d1c53f1SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 90d1c53f1SHong Zhang PetscInt k,cstart,cend,l,row,col,nz,spidx,i,j; 105edff71fSBarry Smith PetscScalar dx=0.0,*w3_array,*dy_i,*dy=coloring->dy; 110d1c53f1SHong Zhang PetscScalar *vscale_array; 125edff71fSBarry Smith const PetscScalar *xx; 130d1c53f1SHong Zhang PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 140d1c53f1SHong Zhang Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 150d1c53f1SHong Zhang void *fctx=coloring->fctx; 160d1c53f1SHong Zhang PetscInt ctype=coloring->ctype,nxloc,nrows_k; 170d1c53f1SHong Zhang PetscScalar *valaddr; 180d1c53f1SHong Zhang MatEntry *Jentry=coloring->matentry; 190df34763SHong Zhang MatEntry2 *Jentry2=coloring->matentry2; 200d1c53f1SHong Zhang const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 210d1c53f1SHong Zhang PetscInt bs=J->rmap->bs; 220d1c53f1SHong Zhang 230d1c53f1SHong Zhang PetscFunctionBegin; 245f80ce2aSJacob Faibussowitsch CHKERRQ(VecBindToCPU(x1,PETSC_TRUE)); 250d1c53f1SHong Zhang /* (1) Set w1 = F(x1) */ 260d1c53f1SHong Zhang if (!coloring->fset) { 275f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventBegin(MAT_FDColoringFunction,coloring,0,0,0)); 285f80ce2aSJacob Faibussowitsch CHKERRQ((*f)(sctx,x1,w1,fctx)); 295f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventEnd(MAT_FDColoringFunction,coloring,0,0,0)); 300d1c53f1SHong Zhang } else { 310d1c53f1SHong Zhang coloring->fset = PETSC_FALSE; 320d1c53f1SHong Zhang } 330d1c53f1SHong Zhang 340d1c53f1SHong Zhang /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 355f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(x1,&nxloc)); 360d1c53f1SHong Zhang if (coloring->htype[0] == 'w') { 370d1c53f1SHong Zhang /* vscale = dx is a constant scalar */ 385f80ce2aSJacob Faibussowitsch CHKERRQ(VecNorm(x1,NORM_2,&unorm)); 390d1c53f1SHong Zhang dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 400d1c53f1SHong Zhang } else { 415f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(x1,&xx)); 425f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(vscale,&vscale_array)); 430d1c53f1SHong Zhang for (col=0; col<nxloc; col++) { 440d1c53f1SHong Zhang dx = xx[col]; 450d1c53f1SHong Zhang if (PetscAbsScalar(dx) < umin) { 460d1c53f1SHong Zhang if (PetscRealPart(dx) >= 0.0) dx = umin; 470d1c53f1SHong Zhang else if (PetscRealPart(dx) < 0.0) dx = -umin; 480d1c53f1SHong Zhang } 490d1c53f1SHong Zhang dx *= epsilon; 500d1c53f1SHong Zhang vscale_array[col] = 1.0/dx; 510d1c53f1SHong Zhang } 525f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(x1,&xx)); 535f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(vscale,&vscale_array)); 540d1c53f1SHong Zhang } 55684f2004SHong Zhang if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 565f80ce2aSJacob Faibussowitsch CHKERRQ(VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD)); 575f80ce2aSJacob Faibussowitsch CHKERRQ(VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD)); 580d1c53f1SHong Zhang } 590d1c53f1SHong Zhang 600d1c53f1SHong Zhang /* (3) Loop over each color */ 610d1c53f1SHong Zhang if (!coloring->w3) { 625f80ce2aSJacob Faibussowitsch CHKERRQ(VecDuplicate(x1,&coloring->w3)); 63ce911d59SRichard Tran Mills /* Vec is used intensively in particular piece of scalar CPU code; won't benefit from bouncing back and forth to the GPU */ 645f80ce2aSJacob Faibussowitsch CHKERRQ(VecBindToCPU(coloring->w3,PETSC_TRUE)); 655f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3)); 660d1c53f1SHong Zhang } 670d1c53f1SHong Zhang w3 = coloring->w3; 680d1c53f1SHong Zhang 695f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(x1,&cstart,&cend)); /* used by ghosted vscale */ 700d1c53f1SHong Zhang if (vscale) { 715f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(vscale,&vscale_array)); 720d1c53f1SHong Zhang } 730d1c53f1SHong Zhang nz = 0; 740d1c53f1SHong Zhang for (k=0; k<ncolors; k++) { 750d1c53f1SHong Zhang coloring->currentcolor = k; 760d1c53f1SHong Zhang 770d1c53f1SHong Zhang /* 780d1c53f1SHong Zhang (3-1) Loop over each column associated with color 790d1c53f1SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 800d1c53f1SHong Zhang */ 815f80ce2aSJacob Faibussowitsch CHKERRQ(VecCopy(x1,w3)); 820d1c53f1SHong Zhang dy_i = dy; 830d1c53f1SHong Zhang for (i=0; i<bs; i++) { /* Loop over a block of columns */ 845f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(w3,&w3_array)); 850d1c53f1SHong Zhang if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 860d1c53f1SHong Zhang if (coloring->htype[0] == 'w') { 870d1c53f1SHong Zhang for (l=0; l<ncolumns[k]; l++) { 880d1c53f1SHong Zhang col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 890d1c53f1SHong Zhang w3_array[col] += 1.0/dx; 90684f2004SHong Zhang if (i) w3_array[col-1] -= 1.0/dx; /* resume original w3[col-1] */ 910d1c53f1SHong Zhang } 920d1c53f1SHong Zhang } else { /* htype == 'ds' */ 930d1c53f1SHong Zhang vscale_array -= cstart; /* shift pointer so global index can be used */ 940d1c53f1SHong Zhang for (l=0; l<ncolumns[k]; l++) { 95f8c2866eSHong Zhang col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 960d1c53f1SHong Zhang w3_array[col] += 1.0/vscale_array[col]; 97684f2004SHong Zhang if (i) w3_array[col-1] -= 1.0/vscale_array[col-1]; /* resume original w3[col-1] */ 980d1c53f1SHong Zhang } 990d1c53f1SHong Zhang vscale_array += cstart; 1000d1c53f1SHong Zhang } 1010d1c53f1SHong Zhang if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 1025f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(w3,&w3_array)); 1030d1c53f1SHong Zhang 1040d1c53f1SHong Zhang /* 1050d1c53f1SHong Zhang (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 1060d1c53f1SHong Zhang w2 = F(x1 + dx) - F(x1) 1070d1c53f1SHong Zhang */ 1085f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0)); 1095f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(w2,dy_i)); /* place w2 to the array dy_i */ 1105f80ce2aSJacob Faibussowitsch CHKERRQ((*f)(sctx,w3,w2,fctx)); 1115f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0)); 1125f80ce2aSJacob Faibussowitsch CHKERRQ(VecAXPY(w2,-1.0,w1)); 1135f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(w2)); 1140d1c53f1SHong Zhang dy_i += nxloc; /* points to dy+i*nxloc */ 1150d1c53f1SHong Zhang } 1160d1c53f1SHong Zhang 1170d1c53f1SHong Zhang /* 1180d1c53f1SHong Zhang (3-3) Loop over rows of vector, putting results into Jacobian matrix 1190d1c53f1SHong Zhang */ 1200d1c53f1SHong Zhang nrows_k = nrows[k]; 1210d1c53f1SHong Zhang if (coloring->htype[0] == 'w') { 1220d1c53f1SHong Zhang for (l=0; l<nrows_k; l++) { 1230df34763SHong Zhang row = bs*Jentry2[nz].row; /* local row index */ 1240df34763SHong Zhang valaddr = Jentry2[nz++].valaddr; 1250d1c53f1SHong Zhang spidx = 0; 1260d1c53f1SHong Zhang dy_i = dy; 1270d1c53f1SHong Zhang for (i=0; i<bs; i++) { /* column of the block */ 1280d1c53f1SHong Zhang for (j=0; j<bs; j++) { /* row of the block */ 1290d1c53f1SHong Zhang valaddr[spidx++] = dy_i[row+j]*dx; 1300d1c53f1SHong Zhang } 131f8c2866eSHong Zhang dy_i += nxloc; /* points to dy+i*nxloc */ 1320d1c53f1SHong Zhang } 1330d1c53f1SHong Zhang } 1340d1c53f1SHong Zhang } else { /* htype == 'ds' */ 1350d1c53f1SHong Zhang for (l=0; l<nrows_k; l++) { 1360d1c53f1SHong Zhang row = bs*Jentry[nz].row; /* local row index */ 1370d1c53f1SHong Zhang col = bs*Jentry[nz].col; /* local column index */ 138684f2004SHong Zhang valaddr = Jentry[nz++].valaddr; 139f8c2866eSHong Zhang spidx = 0; 140f8c2866eSHong Zhang dy_i = dy; 141f8c2866eSHong Zhang for (i=0; i<bs; i++) { /* column of the block */ 142f8c2866eSHong Zhang for (j=0; j<bs; j++) { /* row of the block */ 143f8c2866eSHong Zhang valaddr[spidx++] = dy_i[row+j]*vscale_array[col+i]; 144f8c2866eSHong Zhang } 145f8c2866eSHong Zhang dy_i += nxloc; /* points to dy+i*nxloc */ 146f8c2866eSHong Zhang } 1470d1c53f1SHong Zhang } 1480d1c53f1SHong Zhang } 1490d1c53f1SHong Zhang } 1505f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1515f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1520d1c53f1SHong Zhang if (vscale) { 1535f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(vscale,&vscale_array)); 1540d1c53f1SHong Zhang } 1550d1c53f1SHong Zhang 1560d1c53f1SHong Zhang coloring->currentcolor = -1; 1575f80ce2aSJacob Faibussowitsch CHKERRQ(VecBindToCPU(x1,PETSC_FALSE)); 1580d1c53f1SHong Zhang PetscFunctionReturn(0); 1590d1c53f1SHong Zhang } 160a64fbb32SBarry Smith 161531c7667SBarry Smith /* this is declared PETSC_EXTERN because it is used by MatFDColoringUseDM() which is in the DM library */ 162d1e9a80fSBarry Smith PetscErrorCode MatFDColoringApply_AIJ(Mat J,MatFDColoring coloring,Vec x1,void *sctx) 163fcd7ac73SHong Zhang { 164fcd7ac73SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 165a2f2d239SHong Zhang PetscInt k,cstart,cend,l,row,col,nz; 1665edff71fSBarry Smith PetscScalar dx=0.0,*y,*w3_array; 1675edff71fSBarry Smith const PetscScalar *xx; 168fcd7ac73SHong Zhang PetscScalar *vscale_array; 169fcd7ac73SHong Zhang PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 1708bc97078SHong Zhang Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 171fcd7ac73SHong Zhang void *fctx=coloring->fctx; 17291e7fa0fSBarry Smith ISColoringType ctype=coloring->ctype; 17391e7fa0fSBarry Smith PetscInt nxloc,nrows_k; 174573f477fSHong Zhang MatEntry *Jentry=coloring->matentry; 1750df34763SHong Zhang MatEntry2 *Jentry2=coloring->matentry2; 1768bc97078SHong Zhang const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 177b294de21SRichard Tran Mills PetscBool alreadyboundtocpu; 178fcd7ac73SHong Zhang 179fcd7ac73SHong Zhang PetscFunctionBegin; 1805f80ce2aSJacob Faibussowitsch CHKERRQ(VecBoundToCPU(x1,&alreadyboundtocpu)); 1815f80ce2aSJacob Faibussowitsch CHKERRQ(VecBindToCPU(x1,PETSC_TRUE)); 1822c71b3e2SJacob Faibussowitsch PetscCheckFalse((ctype == IS_COLORING_LOCAL) && (J->ops->fdcoloringapply == MatFDColoringApply_AIJ),PetscObjectComm((PetscObject)J),PETSC_ERR_SUP,"Must call MatColoringUseDM() with IS_COLORING_LOCAL"); 1838bc97078SHong Zhang /* (1) Set w1 = F(x1) */ 184fcd7ac73SHong Zhang if (!coloring->fset) { 1855f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0)); 1865f80ce2aSJacob Faibussowitsch CHKERRQ((*f)(sctx,x1,w1,fctx)); 1875f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0)); 188fcd7ac73SHong Zhang } else { 189fcd7ac73SHong Zhang coloring->fset = PETSC_FALSE; 190fcd7ac73SHong Zhang } 191fcd7ac73SHong Zhang 1928bc97078SHong Zhang /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 193f6af9589SHong Zhang if (coloring->htype[0] == 'w') { 194684f2004SHong Zhang /* vscale = 1./dx is a constant scalar */ 1955f80ce2aSJacob Faibussowitsch CHKERRQ(VecNorm(x1,NORM_2,&unorm)); 196c53567a0SHong Zhang dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 19770e7395fSHong Zhang } else { 1985f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetLocalSize(x1,&nxloc)); 1995f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArrayRead(x1,&xx)); 2005f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(vscale,&vscale_array)); 20174d3cef9SHong Zhang for (col=0; col<nxloc; col++) { 202fcd7ac73SHong Zhang dx = xx[col]; 20374d3cef9SHong Zhang if (PetscAbsScalar(dx) < umin) { 20474d3cef9SHong Zhang if (PetscRealPart(dx) >= 0.0) dx = umin; 20574d3cef9SHong Zhang else if (PetscRealPart(dx) < 0.0) dx = -umin; 20674d3cef9SHong Zhang } 207fcd7ac73SHong Zhang dx *= epsilon; 20874d3cef9SHong Zhang vscale_array[col] = 1.0/dx; 209f6af9589SHong Zhang } 2105f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArrayRead(x1,&xx)); 2115f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(vscale,&vscale_array)); 21270e7395fSHong Zhang } 213684f2004SHong Zhang if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 2145f80ce2aSJacob Faibussowitsch CHKERRQ(VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD)); 2155f80ce2aSJacob Faibussowitsch CHKERRQ(VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD)); 216fcd7ac73SHong Zhang } 217fcd7ac73SHong Zhang 2188bc97078SHong Zhang /* (3) Loop over each color */ 2198bc97078SHong Zhang if (!coloring->w3) { 2205f80ce2aSJacob Faibussowitsch CHKERRQ(VecDuplicate(x1,&coloring->w3)); 2215f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3)); 2228bc97078SHong Zhang } 2238bc97078SHong Zhang w3 = coloring->w3; 224fcd7ac73SHong Zhang 2255f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetOwnershipRange(x1,&cstart,&cend)); /* used by ghosted vscale */ 2269e917edbSHong Zhang if (vscale) { 2275f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(vscale,&vscale_array)); 2289e917edbSHong Zhang } 2298bc97078SHong Zhang nz = 0; 230e2c857f8SHong Zhang 231b3e1f37bSHong Zhang if (coloring->bcols > 1) { /* use blocked insertion of Jentry */ 232b3e1f37bSHong Zhang PetscInt i,m=J->rmap->n,nbcols,bcols=coloring->bcols; 233e2c857f8SHong Zhang PetscScalar *dy=coloring->dy,*dy_k; 234e2c857f8SHong Zhang 235e2c857f8SHong Zhang nbcols = 0; 236e2c857f8SHong Zhang for (k=0; k<ncolors; k+=bcols) { 237e2c857f8SHong Zhang 238e2c857f8SHong Zhang /* 239e2c857f8SHong Zhang (3-1) Loop over each column associated with color 240e2c857f8SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 241e2c857f8SHong Zhang */ 242e2c857f8SHong Zhang 243e2c857f8SHong Zhang dy_k = dy; 244e2c857f8SHong Zhang if (k + bcols > ncolors) bcols = ncolors - k; 245e2c857f8SHong Zhang for (i=0; i<bcols; i++) { 246ceef8a49SBarry Smith coloring->currentcolor = k+i; 247e2c857f8SHong Zhang 2485f80ce2aSJacob Faibussowitsch CHKERRQ(VecCopy(x1,w3)); 2495f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(w3,&w3_array)); 250e2c857f8SHong Zhang if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 251e2c857f8SHong Zhang if (coloring->htype[0] == 'w') { 252e2c857f8SHong Zhang for (l=0; l<ncolumns[k+i]; l++) { 253e2c857f8SHong Zhang col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */ 254e2c857f8SHong Zhang w3_array[col] += 1.0/dx; 255e2c857f8SHong Zhang } 256e2c857f8SHong Zhang } else { /* htype == 'ds' */ 257e2c857f8SHong Zhang vscale_array -= cstart; /* shift pointer so global index can be used */ 258e2c857f8SHong Zhang for (l=0; l<ncolumns[k+i]; l++) { 259e2c857f8SHong Zhang col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */ 260e2c857f8SHong Zhang w3_array[col] += 1.0/vscale_array[col]; 261e2c857f8SHong Zhang } 262e2c857f8SHong Zhang vscale_array += cstart; 263e2c857f8SHong Zhang } 264e2c857f8SHong Zhang if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 2655f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(w3,&w3_array)); 266e2c857f8SHong Zhang 267e2c857f8SHong Zhang /* 268e2c857f8SHong Zhang (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 269e2c857f8SHong Zhang w2 = F(x1 + dx) - F(x1) 270e2c857f8SHong Zhang */ 2715f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0)); 2725f80ce2aSJacob Faibussowitsch CHKERRQ(VecPlaceArray(w2,dy_k)); /* place w2 to the array dy_i */ 2735f80ce2aSJacob Faibussowitsch CHKERRQ((*f)(sctx,w3,w2,fctx)); 2745f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0)); 2755f80ce2aSJacob Faibussowitsch CHKERRQ(VecAXPY(w2,-1.0,w1)); 2765f80ce2aSJacob Faibussowitsch CHKERRQ(VecResetArray(w2)); 277e2c857f8SHong Zhang dy_k += m; /* points to dy+i*nxloc */ 278e2c857f8SHong Zhang } 279e2c857f8SHong Zhang 280e2c857f8SHong Zhang /* 281e2c857f8SHong Zhang (3-3) Loop over block rows of vector, putting results into Jacobian matrix 282e2c857f8SHong Zhang */ 283d880da65SHong Zhang nrows_k = nrows[nbcols++]; 284d880da65SHong Zhang 285e2c857f8SHong Zhang if (coloring->htype[0] == 'w') { 286e2c857f8SHong Zhang for (l=0; l<nrows_k; l++) { 2870df34763SHong Zhang row = Jentry2[nz].row; /* local row index */ 2885059b303SJunchao Zhang /* The 'useless' ifdef is due to a bug in NVIDIA nvc 21.11, which triggers a segfault on this line. We write it in 2895059b303SJunchao Zhang another way, and it seems work. See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html 2905059b303SJunchao Zhang */ 2915059b303SJunchao Zhang #if defined(PETSC_USE_COMPLEX) 2925059b303SJunchao Zhang PetscScalar *tmp = Jentry2[nz].valaddr; 2935059b303SJunchao Zhang *tmp = dy[row]*dx; 2945059b303SJunchao Zhang #else 2955059b303SJunchao Zhang *(Jentry2[nz].valaddr) = dy[row]*dx; 2965059b303SJunchao Zhang #endif 2975059b303SJunchao Zhang nz++; 298e2c857f8SHong Zhang } 299e2c857f8SHong Zhang } else { /* htype == 'ds' */ 300e2c857f8SHong Zhang for (l=0; l<nrows_k; l++) { 301e2c857f8SHong Zhang row = Jentry[nz].row; /* local row index */ 3025059b303SJunchao Zhang #if defined(PETSC_USE_COMPLEX) /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */ 3035059b303SJunchao Zhang PetscScalar *tmp = Jentry[nz].valaddr; 3045059b303SJunchao Zhang *tmp = dy[row]*vscale_array[Jentry[nz].col]; 3055059b303SJunchao Zhang #else 306d880da65SHong Zhang *(Jentry[nz].valaddr) = dy[row]*vscale_array[Jentry[nz].col]; 3075059b303SJunchao Zhang #endif 308e2c857f8SHong Zhang nz++; 309e2c857f8SHong Zhang } 310e2c857f8SHong Zhang } 311e2c857f8SHong Zhang } 312b3e1f37bSHong Zhang } else { /* bcols == 1 */ 3138bc97078SHong Zhang for (k=0; k<ncolors; k++) { 3148bc97078SHong Zhang coloring->currentcolor = k; 3159e917edbSHong Zhang 316fcd7ac73SHong Zhang /* 3178bc97078SHong Zhang (3-1) Loop over each column associated with color 318f6af9589SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 319fcd7ac73SHong Zhang */ 3205f80ce2aSJacob Faibussowitsch CHKERRQ(VecCopy(x1,w3)); 3215f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(w3,&w3_array)); 322a2f2d239SHong Zhang if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 323b039d6c4SHong Zhang if (coloring->htype[0] == 'w') { 3248bc97078SHong Zhang for (l=0; l<ncolumns[k]; l++) { 325f6af9589SHong Zhang col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 3269e917edbSHong Zhang w3_array[col] += 1.0/dx; 3279e917edbSHong Zhang } 328b039d6c4SHong Zhang } else { /* htype == 'ds' */ 329a2f2d239SHong Zhang vscale_array -= cstart; /* shift pointer so global index can be used */ 330b039d6c4SHong Zhang for (l=0; l<ncolumns[k]; l++) { 331b039d6c4SHong Zhang col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 332a2f2d239SHong Zhang w3_array[col] += 1.0/vscale_array[col]; 33370e7395fSHong Zhang } 334a2f2d239SHong Zhang vscale_array += cstart; 335fcd7ac73SHong Zhang } 336a2f2d239SHong Zhang if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 3375f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(w3,&w3_array)); 3389e917edbSHong Zhang 339fcd7ac73SHong Zhang /* 3408bc97078SHong Zhang (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 341fcd7ac73SHong Zhang w2 = F(x1 + dx) - F(x1) 342fcd7ac73SHong Zhang */ 3435f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0)); 3445f80ce2aSJacob Faibussowitsch CHKERRQ((*f)(sctx,w3,w2,fctx)); 3455f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0)); 3465f80ce2aSJacob Faibussowitsch CHKERRQ(VecAXPY(w2,-1.0,w1)); 3479e917edbSHong Zhang 3488bc97078SHong Zhang /* 3498bc97078SHong Zhang (3-3) Loop over rows of vector, putting results into Jacobian matrix 3508bc97078SHong Zhang */ 351c53567a0SHong Zhang nrows_k = nrows[k]; 3525f80ce2aSJacob Faibussowitsch CHKERRQ(VecGetArray(w2,&y)); 353b039d6c4SHong Zhang if (coloring->htype[0] == 'w') { 354c53567a0SHong Zhang for (l=0; l<nrows_k; l++) { 3550df34763SHong Zhang row = Jentry2[nz].row; /* local row index */ 3565059b303SJunchao Zhang #if defined(PETSC_USE_COMPLEX) /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */ 3575059b303SJunchao Zhang PetscScalar *tmp = Jentry2[nz].valaddr; 3585059b303SJunchao Zhang *tmp = y[row]*dx; 3595059b303SJunchao Zhang #else 3605059b303SJunchao Zhang *(Jentry2[nz].valaddr) = y[row]*dx; 3615059b303SJunchao Zhang #endif 3625059b303SJunchao Zhang nz++; 3639e917edbSHong Zhang } 364b039d6c4SHong Zhang } else { /* htype == 'ds' */ 365c53567a0SHong Zhang for (l=0; l<nrows_k; l++) { 366b039d6c4SHong Zhang row = Jentry[nz].row; /* local row index */ 3675059b303SJunchao Zhang #if defined(PETSC_USE_COMPLEX) /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */ 3685059b303SJunchao Zhang PetscScalar *tmp = Jentry[nz].valaddr; 3695059b303SJunchao Zhang *tmp = y[row]*vscale_array[Jentry[nz].col]; 3705059b303SJunchao Zhang #else 371b039d6c4SHong Zhang *(Jentry[nz].valaddr) = y[row]*vscale_array[Jentry[nz].col]; 3725059b303SJunchao Zhang #endif 373573f477fSHong Zhang nz++; 374fcd7ac73SHong Zhang } 375b039d6c4SHong Zhang } 3765f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(w2,&y)); 3778bc97078SHong Zhang } 378b3e1f37bSHong Zhang } 379b3e1f37bSHong Zhang 380e2cf4d64SStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 381c70f7ee4SJunchao Zhang if (J->offloadmask != PETSC_OFFLOAD_UNALLOCATED) J->offloadmask = PETSC_OFFLOAD_CPU; 382e2cf4d64SStefano Zampini #endif 3835f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 3845f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 3859e917edbSHong Zhang if (vscale) { 3865f80ce2aSJacob Faibussowitsch CHKERRQ(VecRestoreArray(vscale,&vscale_array)); 3879e917edbSHong Zhang } 388fcd7ac73SHong Zhang coloring->currentcolor = -1; 3895f80ce2aSJacob Faibussowitsch if (!alreadyboundtocpu) CHKERRQ(VecBindToCPU(x1,PETSC_FALSE)); 390fcd7ac73SHong Zhang PetscFunctionReturn(0); 391fcd7ac73SHong Zhang } 392fcd7ac73SHong Zhang 393f86b9fbaSHong Zhang PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 394fcd7ac73SHong Zhang { 395fcd7ac73SHong Zhang PetscMPIInt size,*ncolsonproc,*disp,nn; 396e3225e9dSHong Zhang PetscInt i,n,nrows,nrows_i,j,k,m,ncols,col,*rowhit,cstart,cend,colb; 39772c15787SHong Zhang const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*row=NULL,*ltog=NULL; 398071fcb05SBarry Smith PetscInt nis=iscoloring->n,nctot,*cols,tmp = 0; 399fcd7ac73SHong Zhang ISLocalToGlobalMapping map=mat->cmap->mapping; 400a8971b87SHong Zhang PetscInt ctype=c->ctype,*spidxA,*spidxB,nz,bs,bs2,spidx; 4010d1c53f1SHong Zhang Mat A,B; 402e3225e9dSHong Zhang PetscScalar *A_val,*B_val,**valaddrhit; 403573f477fSHong Zhang MatEntry *Jentry; 4040df34763SHong Zhang MatEntry2 *Jentry2; 405d4002b98SHong Zhang PetscBool isBAIJ,isSELL; 406531e53bdSHong Zhang PetscInt bcols=c->bcols; 4070d1c53f1SHong Zhang #if defined(PETSC_USE_CTABLE) 4080d1c53f1SHong Zhang PetscTable colmap=NULL; 4090d1c53f1SHong Zhang #else 4100d1c53f1SHong Zhang PetscInt *colmap=NULL; /* local col number of off-diag col */ 4110d1c53f1SHong Zhang #endif 412fcd7ac73SHong Zhang 413fcd7ac73SHong Zhang PetscFunctionBegin; 4145bdb020cSBarry Smith if (ctype == IS_COLORING_LOCAL) { 415*28b400f6SJacob Faibussowitsch PetscCheck(map,PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 4165f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingGetIndices(map,<og)); 41772c15787SHong Zhang } 418fcd7ac73SHong Zhang 4195f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetBlockSize(mat,&bs)); 4205f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ)); 4215f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)mat,MATMPISELL,&isSELL)); 422e3225e9dSHong Zhang if (isBAIJ) { 423195687c5SHong Zhang Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 4246c145f34SHong Zhang Mat_SeqBAIJ *spA,*spB; 425195687c5SHong Zhang A = baij->A; spA = (Mat_SeqBAIJ*)A->data; A_val = spA->a; 426195687c5SHong Zhang B = baij->B; spB = (Mat_SeqBAIJ*)B->data; B_val = spB->a; 4270d1c53f1SHong Zhang nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 428195687c5SHong Zhang if (!baij->colmap) { 4295f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateColmap_MPIBAIJ_Private(mat)); 4300d1c53f1SHong Zhang } 431097e26fcSJed Brown colmap = baij->colmap; 4325f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL)); 4335f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL)); 434195687c5SHong Zhang 435195687c5SHong Zhang if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 436195687c5SHong Zhang PetscInt *garray; 4375f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(B->cmap->n,&garray)); 438195687c5SHong Zhang for (i=0; i<baij->B->cmap->n/bs; i++) { 439195687c5SHong Zhang for (j=0; j<bs; j++) { 440195687c5SHong Zhang garray[i*bs+j] = bs*baij->garray[i]+j; 441195687c5SHong Zhang } 442195687c5SHong Zhang } 4435f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,garray,&c->vscale)); 4445f80ce2aSJacob Faibussowitsch CHKERRQ(VecBindToCPU(c->vscale,PETSC_TRUE)); 4455f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(garray)); 446195687c5SHong Zhang } 447d4002b98SHong Zhang } else if (isSELL) { 448d4002b98SHong Zhang Mat_MPISELL *sell=(Mat_MPISELL*)mat->data; 449d4002b98SHong Zhang Mat_SeqSELL *spA,*spB; 450d4002b98SHong Zhang A = sell->A; spA = (Mat_SeqSELL*)A->data; A_val = spA->val; 451d4002b98SHong Zhang B = sell->B; spB = (Mat_SeqSELL*)B->data; B_val = spB->val; 452f4b713efSHong Zhang nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 453d4002b98SHong Zhang if (!sell->colmap) { 454f4b713efSHong Zhang /* Allow access to data structures of local part of matrix 455f4b713efSHong Zhang - creates aij->colmap which maps global column number to local number in part B */ 4565f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateColmap_MPISELL_Private(mat)); 457f4b713efSHong Zhang } 458d4002b98SHong Zhang colmap = sell->colmap; 4595f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetColumnIJ_SeqSELL_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL)); 4605f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetColumnIJ_SeqSELL_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL)); 461f4b713efSHong Zhang 462f4b713efSHong Zhang bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ 463f4b713efSHong Zhang 464f4b713efSHong Zhang if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 4655f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,sell->garray,&c->vscale)); 4665f80ce2aSJacob Faibussowitsch CHKERRQ(VecBindToCPU(c->vscale,PETSC_TRUE)); 467f4b713efSHong Zhang } 468e3225e9dSHong Zhang } else { 469e3225e9dSHong Zhang Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 470e3225e9dSHong Zhang Mat_SeqAIJ *spA,*spB; 471e3225e9dSHong Zhang A = aij->A; spA = (Mat_SeqAIJ*)A->data; A_val = spA->a; 472e3225e9dSHong Zhang B = aij->B; spB = (Mat_SeqAIJ*)B->data; B_val = spB->a; 473e3225e9dSHong Zhang nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 474e3225e9dSHong Zhang if (!aij->colmap) { 475e3225e9dSHong Zhang /* Allow access to data structures of local part of matrix 476e3225e9dSHong Zhang - creates aij->colmap which maps global column number to local number in part B */ 4775f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateColmap_MPIAIJ_Private(mat)); 478e3225e9dSHong Zhang } 479097e26fcSJed Brown colmap = aij->colmap; 4805f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL)); 4815f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL)); 482e3225e9dSHong Zhang 483e3225e9dSHong Zhang bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ 484195687c5SHong Zhang 485195687c5SHong Zhang if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 4865f80ce2aSJacob Faibussowitsch CHKERRQ(VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,aij->garray,&c->vscale)); 4875f80ce2aSJacob Faibussowitsch CHKERRQ(VecBindToCPU(c->vscale,PETSC_TRUE)); 488195687c5SHong Zhang } 4890d1c53f1SHong Zhang } 4900d1c53f1SHong Zhang 4910d1c53f1SHong Zhang m = mat->rmap->n/bs; 4920d1c53f1SHong Zhang cstart = mat->cmap->rstart/bs; 4930d1c53f1SHong Zhang cend = mat->cmap->rend/bs; 494fcd7ac73SHong Zhang 4955f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(nis,&c->ncolumns,nis,&c->columns)); 4965f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nis,&c->nrows)); 4975f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt))); 498fcd7ac73SHong Zhang 4990df34763SHong Zhang if (c->htype[0] == 'd') { 5005f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nz,&Jentry)); 5015f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry))); 5028bc97078SHong Zhang c->matentry = Jentry; 5030df34763SHong Zhang } else if (c->htype[0] == 'w') { 5045f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nz,&Jentry2)); 5055f80ce2aSJacob Faibussowitsch CHKERRQ(PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2))); 5060df34763SHong Zhang c->matentry2 = Jentry2; 5070df34763SHong Zhang } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported"); 508a774a6f1SHong Zhang 5095f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(m+1,&rowhit,m+1,&valaddrhit)); 510fcd7ac73SHong Zhang nz = 0; 5115f80ce2aSJacob Faibussowitsch CHKERRQ(ISColoringGetIS(iscoloring,PETSC_OWN_POINTER, PETSC_IGNORE,&c->isa)); 512071fcb05SBarry Smith 513071fcb05SBarry Smith if (ctype == IS_COLORING_GLOBAL) { 5145f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size)); 5155f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(size,&ncolsonproc,size,&disp)); 516071fcb05SBarry Smith } 517071fcb05SBarry Smith 518d3825b63SHong Zhang for (i=0; i<nis; i++) { /* for each local color */ 5195f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetLocalSize(c->isa[i],&n)); 5205f80ce2aSJacob Faibussowitsch CHKERRQ(ISGetIndices(c->isa[i],&is)); 521fcd7ac73SHong Zhang 522fcd7ac73SHong Zhang c->ncolumns[i] = n; /* local number of columns of this color on this process */ 523071fcb05SBarry Smith c->columns[i] = (PetscInt*)is; 524fcd7ac73SHong Zhang 525fcd7ac73SHong Zhang if (ctype == IS_COLORING_GLOBAL) { 526d3825b63SHong Zhang /* Determine nctot, the total (parallel) number of columns of this color */ 527d3825b63SHong Zhang /* ncolsonproc[j]: local ncolumns on proc[j] of this color */ 5285f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMPIIntCast(n,&nn)); 5295f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat))); 530fcd7ac73SHong Zhang nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 531fcd7ac73SHong Zhang if (!nctot) { 5325f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n")); 533fcd7ac73SHong Zhang } 534fcd7ac73SHong Zhang 535fcd7ac73SHong Zhang disp[0] = 0; 536fcd7ac73SHong Zhang for (j=1; j<size; j++) { 537fcd7ac73SHong Zhang disp[j] = disp[j-1] + ncolsonproc[j-1]; 538fcd7ac73SHong Zhang } 539fcd7ac73SHong Zhang 540d3825b63SHong Zhang /* Get cols, the complete list of columns for this color on each process */ 5415f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(nctot+1,&cols)); 5425f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat))); 5435bdb020cSBarry Smith } else if (ctype == IS_COLORING_LOCAL) { 544fcd7ac73SHong Zhang /* Determine local number of columns of this color on this process, including ghost points */ 545fcd7ac73SHong Zhang nctot = n; 546071fcb05SBarry Smith cols = (PetscInt*)is; 547fcd7ac73SHong Zhang } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 548fcd7ac73SHong Zhang 5491b97d346SHong Zhang /* Mark all rows affect by these columns */ 5505f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArrayzero(rowhit,m)); 5510d1c53f1SHong Zhang bs2 = bs*bs; 5524b2e90caSHong Zhang nrows_i = 0; 5531b97d346SHong Zhang for (j=0; j<nctot; j++) { /* loop over columns*/ 5545bdb020cSBarry Smith if (ctype == IS_COLORING_LOCAL) { 555fcd7ac73SHong Zhang col = ltog[cols[j]]; 556fcd7ac73SHong Zhang } else { 557fcd7ac73SHong Zhang col = cols[j]; 558fcd7ac73SHong Zhang } 559e3225e9dSHong Zhang if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */ 560071fcb05SBarry Smith tmp = A_ci[col-cstart]; 561071fcb05SBarry Smith row = A_cj + tmp; 562071fcb05SBarry Smith nrows = A_ci[col-cstart+1] - tmp; 5634b2e90caSHong Zhang nrows_i += nrows; 564071fcb05SBarry Smith 565fcd7ac73SHong Zhang /* loop over columns of A marking them in rowhit */ 566d3825b63SHong Zhang for (k=0; k<nrows; k++) { 567d3825b63SHong Zhang /* set valaddrhit for part A */ 568071fcb05SBarry Smith spidx = bs2*spidxA[tmp + k]; 569e3225e9dSHong Zhang valaddrhit[*row] = &A_val[spidx]; 570a774a6f1SHong Zhang rowhit[*row++] = col - cstart + 1; /* local column index */ 571fcd7ac73SHong Zhang } 572e3225e9dSHong Zhang } else { /* column is in B, off-diagonal block of mat */ 573fcd7ac73SHong Zhang #if defined(PETSC_USE_CTABLE) 5745f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTableFind(colmap,col+1,&colb)); 575fcd7ac73SHong Zhang colb--; 576fcd7ac73SHong Zhang #else 5770d1c53f1SHong Zhang colb = colmap[col] - 1; /* local column index */ 578fcd7ac73SHong Zhang #endif 579fcd7ac73SHong Zhang if (colb == -1) { 580d3825b63SHong Zhang nrows = 0; 581fcd7ac73SHong Zhang } else { 5820d1c53f1SHong Zhang colb = colb/bs; 583071fcb05SBarry Smith tmp = B_ci[colb]; 584071fcb05SBarry Smith row = B_cj + tmp; 585071fcb05SBarry Smith nrows = B_ci[colb+1] - tmp; 586fcd7ac73SHong Zhang } 5874b2e90caSHong Zhang nrows_i += nrows; 588fcd7ac73SHong Zhang /* loop over columns of B marking them in rowhit */ 589d3825b63SHong Zhang for (k=0; k<nrows; k++) { 590d3825b63SHong Zhang /* set valaddrhit for part B */ 591071fcb05SBarry Smith spidx = bs2*spidxB[tmp + k]; 592e3225e9dSHong Zhang valaddrhit[*row] = &B_val[spidx]; 59370e7395fSHong Zhang rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */ 594fcd7ac73SHong Zhang } 595fcd7ac73SHong Zhang } 596fcd7ac73SHong Zhang } 5974b2e90caSHong Zhang c->nrows[i] = nrows_i; 5988bc97078SHong Zhang 5990df34763SHong Zhang if (c->htype[0] == 'd') { 600d3825b63SHong Zhang for (j=0; j<m; j++) { 601fcd7ac73SHong Zhang if (rowhit[j]) { 602573f477fSHong Zhang Jentry[nz].row = j; /* local row index */ 603573f477fSHong Zhang Jentry[nz].col = rowhit[j] - 1; /* local column index */ 604573f477fSHong Zhang Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 605573f477fSHong Zhang nz++; 606fcd7ac73SHong Zhang } 607fcd7ac73SHong Zhang } 6080df34763SHong Zhang } else { /* c->htype == 'wp' */ 6090df34763SHong Zhang for (j=0; j<m; j++) { 6100df34763SHong Zhang if (rowhit[j]) { 6110df34763SHong Zhang Jentry2[nz].row = j; /* local row index */ 6120df34763SHong Zhang Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 6130df34763SHong Zhang nz++; 6140df34763SHong Zhang } 6150df34763SHong Zhang } 6160df34763SHong Zhang } 617071fcb05SBarry Smith if (ctype == IS_COLORING_GLOBAL) { 6185f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(cols)); 619fcd7ac73SHong Zhang } 620071fcb05SBarry Smith } 621071fcb05SBarry Smith if (ctype == IS_COLORING_GLOBAL) { 6225f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(ncolsonproc,disp)); 623071fcb05SBarry Smith } 624fcd7ac73SHong Zhang 625a8971b87SHong Zhang if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */ 6265f80ce2aSJacob Faibussowitsch CHKERRQ(MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz)); 627531e53bdSHong Zhang } 628531e53bdSHong Zhang 6290d1c53f1SHong Zhang if (isBAIJ) { 6305f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL)); 6315f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL)); 6325f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(bs*mat->rmap->n,&c->dy)); 633d4002b98SHong Zhang } else if (isSELL) { 6345f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreColumnIJ_SeqSELL_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL)); 6355f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreColumnIJ_SeqSELL_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL)); 6360d1c53f1SHong Zhang } else { 6375f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL)); 6385f80ce2aSJacob Faibussowitsch CHKERRQ(MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL)); 6390d1c53f1SHong Zhang } 6400d1c53f1SHong Zhang 6415f80ce2aSJacob Faibussowitsch CHKERRQ(ISColoringRestoreIS(iscoloring,PETSC_OWN_POINTER,&c->isa)); 6425f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(rowhit,valaddrhit)); 643a8971b87SHong Zhang 6445bdb020cSBarry Smith if (ctype == IS_COLORING_LOCAL) { 6455f80ce2aSJacob Faibussowitsch CHKERRQ(ISLocalToGlobalMappingRestoreIndices(map,<og)); 64672c15787SHong Zhang } 6475f80ce2aSJacob Faibussowitsch CHKERRQ(PetscInfo(c,"ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n",c->ncolors,c->brows,c->bcols)); 648fcd7ac73SHong Zhang PetscFunctionReturn(0); 649fcd7ac73SHong Zhang } 65093dfae19SHong Zhang 65193dfae19SHong Zhang PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 65293dfae19SHong Zhang { 653a8971b87SHong Zhang PetscInt bs,nis=iscoloring->n,m=mat->rmap->n; 654d4002b98SHong Zhang PetscBool isBAIJ,isSELL; 655531e53bdSHong Zhang 65693dfae19SHong Zhang PetscFunctionBegin; 657531e53bdSHong Zhang /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian; 658531e53bdSHong Zhang bcols is chosen s.t. dy-array takes 50% of memory space as mat */ 6595f80ce2aSJacob Faibussowitsch CHKERRQ(MatGetBlockSize(mat,&bs)); 6605f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ)); 6615f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)mat,MATMPISELL,&isSELL)); 662b8dfa574SBarry Smith if (isBAIJ || m == 0) { 663a8971b87SHong Zhang c->brows = m; 664a8971b87SHong Zhang c->bcols = 1; 665d4002b98SHong Zhang } else if (isSELL) { 666f4b713efSHong Zhang /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */ 667d4002b98SHong Zhang Mat_MPISELL *sell=(Mat_MPISELL*)mat->data; 668d4002b98SHong Zhang Mat_SeqSELL *spA,*spB; 669f4b713efSHong Zhang Mat A,B; 670f4b713efSHong Zhang PetscInt nz,brows,bcols; 671f4b713efSHong Zhang PetscReal mem; 672f4b713efSHong Zhang 673d4002b98SHong Zhang bs = 1; /* only bs=1 is supported for MPISELL matrix */ 674f4b713efSHong Zhang 675d4002b98SHong Zhang A = sell->A; spA = (Mat_SeqSELL*)A->data; 676d4002b98SHong Zhang B = sell->B; spB = (Mat_SeqSELL*)B->data; 677f4b713efSHong Zhang nz = spA->nz + spB->nz; /* total local nonzero entries of mat */ 678f4b713efSHong Zhang mem = nz*(sizeof(PetscScalar) + sizeof(PetscInt)) + 3*m*sizeof(PetscInt); 679f4b713efSHong Zhang bcols = (PetscInt)(0.5*mem /(m*sizeof(PetscScalar))); 680f4b713efSHong Zhang brows = 1000/bcols; 681f4b713efSHong Zhang if (bcols > nis) bcols = nis; 682f4b713efSHong Zhang if (brows == 0 || brows > m) brows = m; 683f4b713efSHong Zhang c->brows = brows; 684f4b713efSHong Zhang c->bcols = bcols; 685531e53bdSHong Zhang } else { /* mpiaij matrix */ 686531e53bdSHong Zhang /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */ 687531e53bdSHong Zhang Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 688531e53bdSHong Zhang Mat_SeqAIJ *spA,*spB; 689531e53bdSHong Zhang Mat A,B; 690a8971b87SHong Zhang PetscInt nz,brows,bcols; 691531e53bdSHong Zhang PetscReal mem; 692531e53bdSHong Zhang 693531e53bdSHong Zhang bs = 1; /* only bs=1 is supported for MPIAIJ matrix */ 694531e53bdSHong Zhang 695531e53bdSHong Zhang A = aij->A; spA = (Mat_SeqAIJ*)A->data; 696531e53bdSHong Zhang B = aij->B; spB = (Mat_SeqAIJ*)B->data; 697531e53bdSHong Zhang nz = spA->nz + spB->nz; /* total local nonzero entries of mat */ 698531e53bdSHong Zhang mem = nz*(sizeof(PetscScalar) + sizeof(PetscInt)) + 3*m*sizeof(PetscInt); 699531e53bdSHong Zhang bcols = (PetscInt)(0.5*mem /(m*sizeof(PetscScalar))); 700531e53bdSHong Zhang brows = 1000/bcols; 701531e53bdSHong Zhang if (bcols > nis) bcols = nis; 702531e53bdSHong Zhang if (brows == 0 || brows > m) brows = m; 703531e53bdSHong Zhang c->brows = brows; 704531e53bdSHong Zhang c->bcols = bcols; 705531e53bdSHong Zhang } 706a8971b87SHong Zhang 707a8971b87SHong Zhang c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 708a8971b87SHong Zhang c->N = mat->cmap->N/bs; 709a8971b87SHong Zhang c->m = mat->rmap->n/bs; 710a8971b87SHong Zhang c->rstart = mat->rmap->rstart/bs; 711a8971b87SHong Zhang c->ncolors = nis; 71293dfae19SHong Zhang PetscFunctionReturn(0); 71393dfae19SHong Zhang } 714bdaf1daeSBarry Smith 715bdaf1daeSBarry Smith /*@C 716bdaf1daeSBarry Smith 717bdaf1daeSBarry Smith MatFDColoringSetValues - takes a matrix in compressed color format and enters the matrix into a PETSc Mat 718bdaf1daeSBarry Smith 719bdaf1daeSBarry Smith Collective on J 720bdaf1daeSBarry Smith 721bdaf1daeSBarry Smith Input Parameters: 722bdaf1daeSBarry Smith + J - the sparse matrix 723bdaf1daeSBarry Smith . coloring - created with MatFDColoringCreate() and a local coloring 724bdaf1daeSBarry Smith - y - column major storage of matrix values with one color of values per column, the number of rows of y should match 725bdaf1daeSBarry Smith the number of local rows of J and the number of columns is the number of colors. 726bdaf1daeSBarry Smith 727bdaf1daeSBarry Smith Level: intermediate 728bdaf1daeSBarry Smith 7296a9b8d82SBarry Smith Notes: the matrix in compressed color format may come from an Automatic Differentiation code 730bdaf1daeSBarry Smith 731bdaf1daeSBarry Smith The code will be slightly faster if MatFDColoringSetBlockSize(coloring,PETSC_DEFAULT,nc); is called immediately after creating the coloring 732bdaf1daeSBarry Smith 733bdaf1daeSBarry Smith .seealso: MatFDColoringCreate(), ISColoring, ISColoringCreate(), ISColoringSetType(), IS_COLORING_LOCAL, MatFDColoringSetBlockSize() 734bdaf1daeSBarry Smith 735bdaf1daeSBarry Smith @*/ 736bdaf1daeSBarry Smith PetscErrorCode MatFDColoringSetValues(Mat J,MatFDColoring coloring,const PetscScalar *y) 737bdaf1daeSBarry Smith { 738bdaf1daeSBarry Smith MatEntry2 *Jentry2; 739bdaf1daeSBarry Smith PetscInt row,i,nrows_k,l,ncolors,nz = 0,bcols,nbcols = 0; 740bdaf1daeSBarry Smith const PetscInt *nrows; 741bdaf1daeSBarry Smith PetscBool eq; 742bdaf1daeSBarry Smith 743bdaf1daeSBarry Smith PetscFunctionBegin; 744bdaf1daeSBarry Smith PetscValidHeaderSpecific(J,MAT_CLASSID,1); 745bdaf1daeSBarry Smith PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2); 7465f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectCompareId((PetscObject)J,coloring->matid,&eq)); 747*28b400f6SJacob Faibussowitsch PetscCheck(eq,PetscObjectComm((PetscObject)J),PETSC_ERR_ARG_WRONG,"Matrix used with MatFDColoringSetValues() must be that used with MatFDColoringCreate()"); 748bdaf1daeSBarry Smith Jentry2 = coloring->matentry2; 749bdaf1daeSBarry Smith nrows = coloring->nrows; 750bdaf1daeSBarry Smith ncolors = coloring->ncolors; 751bdaf1daeSBarry Smith bcols = coloring->bcols; 752bdaf1daeSBarry Smith 753bdaf1daeSBarry Smith for (i=0; i<ncolors; i+=bcols) { 754bdaf1daeSBarry Smith nrows_k = nrows[nbcols++]; 755bdaf1daeSBarry Smith for (l=0; l<nrows_k; l++) { 756bdaf1daeSBarry Smith row = Jentry2[nz].row; /* local row index */ 757bdaf1daeSBarry Smith *(Jentry2[nz++].valaddr) = y[row]; 758bdaf1daeSBarry Smith } 759bdaf1daeSBarry Smith y += bcols*coloring->m; 760bdaf1daeSBarry Smith } 7615f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 7625f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 763bdaf1daeSBarry Smith PetscFunctionReturn(0); 764bdaf1daeSBarry Smith } 765