1 /* 2 Factorization code for BAIJ format. 3 */ 4 #include "src/mat/impls/baij/seq/baij.h" 5 #include "src/inline/ilu.h" 6 7 /* ------------------------------------------------------------*/ 8 /* 9 Version for when blocks are 2 by 2 10 */ 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2" 13 int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B) 14 { 15 Mat C = *B; 16 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 17 IS isrow = b->row,isicol = b->icol; 18 int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 19 int *ajtmpold,*ajtmp,nz,row; 20 int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj; 21 MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4; 22 MatScalar p1,p2,p3,p4; 23 MatScalar *ba = b->a,*aa = a->a; 24 25 PetscFunctionBegin; 26 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 27 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 28 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 29 30 for (i=0; i<n; i++) { 31 nz = bi[i+1] - bi[i]; 32 ajtmp = bj + bi[i]; 33 for (j=0; j<nz; j++) { 34 x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0; 35 } 36 /* load in initial (unfactored row) */ 37 idx = r[i]; 38 nz = ai[idx+1] - ai[idx]; 39 ajtmpold = aj + ai[idx]; 40 v = aa + 4*ai[idx]; 41 for (j=0; j<nz; j++) { 42 x = rtmp+4*ic[ajtmpold[j]]; 43 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 44 v += 4; 45 } 46 row = *ajtmp++; 47 while (row < i) { 48 pc = rtmp + 4*row; 49 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 50 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 51 pv = ba + 4*diag_offset[row]; 52 pj = bj + diag_offset[row] + 1; 53 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 54 pc[0] = m1 = p1*x1 + p3*x2; 55 pc[1] = m2 = p2*x1 + p4*x2; 56 pc[2] = m3 = p1*x3 + p3*x4; 57 pc[3] = m4 = p2*x3 + p4*x4; 58 nz = bi[row+1] - diag_offset[row] - 1; 59 pv += 4; 60 for (j=0; j<nz; j++) { 61 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 62 x = rtmp + 4*pj[j]; 63 x[0] -= m1*x1 + m3*x2; 64 x[1] -= m2*x1 + m4*x2; 65 x[2] -= m1*x3 + m3*x4; 66 x[3] -= m2*x3 + m4*x4; 67 pv += 4; 68 } 69 PetscLogFlops(16*nz+12); 70 } 71 row = *ajtmp++; 72 } 73 /* finished row so stick it into b->a */ 74 pv = ba + 4*bi[i]; 75 pj = bj + bi[i]; 76 nz = bi[i+1] - bi[i]; 77 for (j=0; j<nz; j++) { 78 x = rtmp+4*pj[j]; 79 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 80 pv += 4; 81 } 82 /* invert diagonal block */ 83 w = ba + 4*diag_offset[i]; 84 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 85 } 86 87 ierr = PetscFree(rtmp);CHKERRQ(ierr); 88 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 89 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 90 C->factor = FACTOR_LU; 91 C->assembled = PETSC_TRUE; 92 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 93 PetscFunctionReturn(0); 94 } 95 /* 96 Version for when blocks are 2 by 2 Using natural ordering 97 */ 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering" 100 int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B) 101 { 102 Mat C = *B; 103 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 104 int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 105 int *ajtmpold,*ajtmp,nz,row; 106 int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj; 107 MatScalar *pv,*v,*rtmp,*pc,*w,*x; 108 MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4; 109 MatScalar *ba = b->a,*aa = a->a; 110 111 PetscFunctionBegin; 112 ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 113 114 for (i=0; i<n; i++) { 115 nz = bi[i+1] - bi[i]; 116 ajtmp = bj + bi[i]; 117 for (j=0; j<nz; j++) { 118 x = rtmp+4*ajtmp[j]; 119 x[0] = x[1] = x[2] = x[3] = 0.0; 120 } 121 /* load in initial (unfactored row) */ 122 nz = ai[i+1] - ai[i]; 123 ajtmpold = aj + ai[i]; 124 v = aa + 4*ai[i]; 125 for (j=0; j<nz; j++) { 126 x = rtmp+4*ajtmpold[j]; 127 x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3]; 128 v += 4; 129 } 130 row = *ajtmp++; 131 while (row < i) { 132 pc = rtmp + 4*row; 133 p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3]; 134 if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) { 135 pv = ba + 4*diag_offset[row]; 136 pj = bj + diag_offset[row] + 1; 137 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 138 pc[0] = m1 = p1*x1 + p3*x2; 139 pc[1] = m2 = p2*x1 + p4*x2; 140 pc[2] = m3 = p1*x3 + p3*x4; 141 pc[3] = m4 = p2*x3 + p4*x4; 142 nz = bi[row+1] - diag_offset[row] - 1; 143 pv += 4; 144 for (j=0; j<nz; j++) { 145 x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3]; 146 x = rtmp + 4*pj[j]; 147 x[0] -= m1*x1 + m3*x2; 148 x[1] -= m2*x1 + m4*x2; 149 x[2] -= m1*x3 + m3*x4; 150 x[3] -= m2*x3 + m4*x4; 151 pv += 4; 152 } 153 PetscLogFlops(16*nz+12); 154 } 155 row = *ajtmp++; 156 } 157 /* finished row so stick it into b->a */ 158 pv = ba + 4*bi[i]; 159 pj = bj + bi[i]; 160 nz = bi[i+1] - bi[i]; 161 for (j=0; j<nz; j++) { 162 x = rtmp+4*pj[j]; 163 pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3]; 164 pv += 4; 165 } 166 /* invert diagonal block */ 167 w = ba + 4*diag_offset[i]; 168 ierr = Kernel_A_gets_inverse_A_2(w);CHKERRQ(ierr); 169 /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/ 170 } 171 172 ierr = PetscFree(rtmp);CHKERRQ(ierr); 173 C->factor = FACTOR_LU; 174 C->assembled = PETSC_TRUE; 175 PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */ 176 PetscFunctionReturn(0); 177 } 178 179 /* ----------------------------------------------------------- */ 180 /* 181 Version for when blocks are 1 by 1. 182 */ 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_1" 185 int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B) 186 { 187 Mat C = *B; 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data; 189 IS isrow = b->row,isicol = b->icol; 190 int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j; 191 int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j; 192 int *diag_offset = b->diag,diag,*pj; 193 MatScalar *pv,*v,*rtmp,multiplier,*pc; 194 MatScalar *ba = b->a,*aa = a->a; 195 196 PetscFunctionBegin; 197 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 198 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 199 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 200 201 for (i=0; i<n; i++) { 202 nz = bi[i+1] - bi[i]; 203 ajtmp = bj + bi[i]; 204 for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0; 205 206 /* load in initial (unfactored row) */ 207 nz = ai[r[i]+1] - ai[r[i]]; 208 ajtmpold = aj + ai[r[i]]; 209 v = aa + ai[r[i]]; 210 for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j]; 211 212 row = *ajtmp++; 213 while (row < i) { 214 pc = rtmp + row; 215 if (*pc != 0.0) { 216 pv = ba + diag_offset[row]; 217 pj = bj + diag_offset[row] + 1; 218 multiplier = *pc * *pv++; 219 *pc = multiplier; 220 nz = bi[row+1] - diag_offset[row] - 1; 221 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 222 PetscLogFlops(1+2*nz); 223 } 224 row = *ajtmp++; 225 } 226 /* finished row so stick it into b->a */ 227 pv = ba + bi[i]; 228 pj = bj + bi[i]; 229 nz = bi[i+1] - bi[i]; 230 for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];} 231 diag = diag_offset[i] - bi[i]; 232 /* check pivot entry for current row */ 233 if (pv[diag] != 0.0) { 234 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot"); 235 } 236 pv[diag] = 1.0/pv[diag]; 237 } 238 239 ierr = PetscFree(rtmp);CHKERRQ(ierr); 240 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 241 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 242 C->factor = FACTOR_LU; 243 C->assembled = PETSC_TRUE; 244 PetscLogFlops(C->n); 245 PetscFunctionReturn(0); 246 } 247 248 249 /* ----------------------------------------------------------- */ 250 #undef __FUNCT__ 251 #define __FUNCT__ "MatLUFactor_SeqBAIJ" 252 int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 253 { 254 int ierr; 255 Mat C; 256 257 PetscFunctionBegin; 258 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 259 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 260 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 261 PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol); 262 PetscFunctionReturn(0); 263 } 264 265 266