1 /*$Id: baijfact.c,v 1.90 2001/03/23 23:22:07 balay Exp $*/ 2 /* 3 Factorization code for BAIJ format. 4 */ 5 #include "src/mat/impls/baij/seq/baij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/ilu.h" 8 9 /* ------------------------------------------------------------*/ 10 /* 11 Version for when blocks are 2 by 2 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 15 int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B) 16 { 17 Mat C = *B; 18 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 19 IS isrow = b->row,isicol = b->icol; 20 int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 21 int *ajtmpold,*ajtmp,nz,row; 22 int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 23 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 24 MatScalar p1,p2,p3,p4; 25 MatScalar *ba = b->a,*aa = a->a; 26 27 PetscFunctionBegin; 28 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 29 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 30 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 31 32 for (i=0; i<n; i++) { 33 nz = bi[i+1] - bi[i]; 34 ajtmp = bj + bi[i]; 35 for (j=0; j<nz; j++) { 36 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 37 } 38 /* load in initial (unfactored row) */ 39 idx = r[i]; 40 nz = ai[idx+1] - ai[idx]; 41 ajtmpold = aj + ai[idx]; 42 v = aa + 4*ai[idx]; 43 for (j=0; j<nz; j++) { 44 x = rtmp+4*ic[ajtmpold[j]]; 45 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 46 v += 4; 47 } 48 row = *ajtmp++; 49 while (row < i) { 50 pc = rtmp + 4*row; 51 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 52 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 53 pv = ba + 4*diag_offset[row]; 54 pj = bj + diag_offset[row] + 1; 55 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 56 pc[0] = m1 = p1*x1 + p3*x2; 57 pc[1] = m2 = p2*x1 + p4*x2; 58 pc[2] = m3 = p1*x3 + p3*x4; 59 pc[3] = m4 = p2*x3 + p4*x4; 60 nz = bi[row+1] - diag_offset[row] - 1; 61 pv += 4; 62 for (j=0; j<nz; j++) { 63 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 64 x = rtmp + 4*pj[j]; 65 x[0] -= m1*x1 + m3*x2; 66 x[1] -= m2*x1 + m4*x2; 67 x[2] -= m1*x3 + m3*x4; 68 x[3] -= m2*x3 + m4*x4; 69 pv += 4; 70 } 71 PetscLogFlops(16*nz+12); 72 } 73 row = *ajtmp++; 74 } 75 /* finished row so stick it into b->a */ 76 pv = ba + 4*bi[i]; 77 pj = bj + bi[i]; 78 nz = bi[i+1] - bi[i]; 79 for (j=0; j<nz; j++) { 80 x = rtmp+4*pj[j]; 81 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 82 pv += 4; 83 } 84 /* invert diagonal block */ 85 w = ba + 4*diag_offset[i]; 86 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 87 } 88 89 ierr = PetscFree(rtmp);CHKERRQ(ierr); 90 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 91 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 92 C->factor = FACTOR_LU; 93 C->assembled = PETSC_TRUE; 94 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 95 PetscFunctionReturn(0); 96 } 97 /* 98 Version for when blocks are 2 by 2 Using natural ordering 99 */ 100 #undef __FUNCT__ 101 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 102 int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B) 103 { 104 Mat C = *B; 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 106 int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 107 int *ajtmpold,*ajtmp,nz,row; 108 int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 109 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 110 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 111 MatScalar *ba = b->a,*aa = a->a; 112 113 PetscFunctionBegin; 114 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 115 116 for (i=0; i<n; i++) { 117 nz = bi[i+1] - bi[i]; 118 ajtmp = bj + bi[i]; 119 for (j=0; j<nz; j++) { 120 x = rtmp+4*ajtmp[j]; 121 x[0] = x[1] = x[2] = x[3] = 0.0; 122 } 123 /* load in initial (unfactored row) */ 124 nz = ai[i+1] - ai[i]; 125 ajtmpold = aj + ai[i]; 126 v = aa + 4*ai[i]; 127 for (j=0; j<nz; j++) { 128 x = rtmp+4*ajtmpold[j]; 129 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 130 v += 4; 131 } 132 row = *ajtmp++; 133 while (row < i) { 134 pc = rtmp + 4*row; 135 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 136 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 137 pv = ba + 4*diag_offset[row]; 138 pj = bj + diag_offset[row] + 1; 139 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 140 pc[0] = m1 = p1*x1 + p3*x2; 141 pc[1] = m2 = p2*x1 + p4*x2; 142 pc[2] = m3 = p1*x3 + p3*x4; 143 pc[3] = m4 = p2*x3 + p4*x4; 144 nz = bi[row+1] - diag_offset[row] - 1; 145 pv += 4; 146 for (j=0; j<nz; j++) { 147 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 148 x = rtmp + 4*pj[j]; 149 x[0] -= m1*x1 + m3*x2; 150 x[1] -= m2*x1 + m4*x2; 151 x[2] -= m1*x3 + m3*x4; 152 x[3] -= m2*x3 + m4*x4; 153 pv += 4; 154 } 155 PetscLogFlops(16*nz+12); 156 } 157 row = *ajtmp++; 158 } 159 /* finished row so stick it into b->a */ 160 pv = ba + 4*bi[i]; 161 pj = bj + bi[i]; 162 nz = bi[i+1] - bi[i]; 163 for (j=0; j<nz; j++) { 164 x = rtmp+4*pj[j]; 165 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 166 pv += 4; 167 } 168 /* invert diagonal block */ 169 w = ba + 4*diag_offset[i]; 170 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 171 /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/ 172 } 173 174 ierr = PetscFree(rtmp);CHKERRQ(ierr); 175 C->factor = FACTOR_LU; 176 C->assembled = PETSC_TRUE; 177 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 178 PetscFunctionReturn(0); 179 } 180 181 /* ----------------------------------------------------------- */ 182 /* 183 Version for when blocks are 1 by 1. 184 */ 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 187 int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B) 188 { 189 Mat C = *B; 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 191 IS isrow = b->row,isicol = b->icol; 192 int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 193 int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 194 int *diag_offset = b->diag,diag,*pj; 195 MatScalar *pv,*v,*rtmp,multiplier,*pc; 196 MatScalar *ba = b->a,*aa = a->a; 197 198 PetscFunctionBegin; 199 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 200 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 201 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 202 203 for (i=0; i<n; i++) { 204 nz = bi[i+1] - bi[i]; 205 ajtmp = bj + bi[i]; 206 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 207 208 /* load in initial (unfactored row) */ 209 nz = ai[r[i]+1] - ai[r[i]]; 210 ajtmpold = aj + ai[r[i]]; 211 v = aa + ai[r[i]]; 212 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 213 214 row = *ajtmp++; 215 while (row < i) { 216 pc = rtmp + row; 217 if (*pc != 0.0) { 218 pv = ba + diag_offset[row]; 219 pj = bj + diag_offset[row] + 1; 220 multiplier = *pc * *pv++; 221 *pc = multiplier; 222 nz = bi[row+1] - diag_offset[row] - 1; 223 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 224 PetscLogFlops(1+2*nz); 225 } 226 row = *ajtmp++; 227 } 228 /* finished row so stick it into b->a */ 229 pv = ba + bi[i]; 230 pj = bj + bi[i]; 231 nz = bi[i+1] - bi[i]; 232 for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 233 diag = diag_offset[i] - bi[i]; 234 /* check pivot entry for current row */ 235 if (pv[diag] == 0.0) { 236 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 237 } 238 pv[diag] = 1.0/pv[diag]; 239 } 240 241 ierr = PetscFree(rtmp);CHKERRQ(ierr); 242 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 243 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 244 C->factor = FACTOR_LU; 245 C->assembled = PETSC_TRUE; 246 PetscLogFlops(C->n); 247 PetscFunctionReturn(0); 248 } 249 250 251 /* ----------------------------------------------------------- */ 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 254 int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 255 { 256 int ierr; 257 Mat C; 258 259 PetscFunctionBegin; 260 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 261 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 262 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 263 PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol); 264 PetscFunctionReturn(0); 265 } 266 267 268