xref: /petsc/src/mat/impls/baij/seq/baij.c (revision e48d15ef255607eaac65c71635979f78eabeae7c)
1be1d678aSKris Buschelman 
22593348eSBarry Smith /*
3b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
42593348eSBarry Smith   matrix storage format.
52593348eSBarry Smith */
6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7c6db04a5SJed Brown #include <petscblaslapack.h>
806873bf2SBarry Smith #include <petsc-private/kernels/blockinvert.h>
9b01c7715SBarry Smith 
1043516a2dSKris Buschelman 
1143516a2dSKris Buschelman #undef __FUNCT__
12b01c7715SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13713ccfa9SJed Brown PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14b01c7715SBarry Smith {
15b01c7715SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
166849ba73SBarry Smith   PetscErrorCode ierr;
17de80f912SBarry Smith   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18de80f912SBarry Smith   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
1962bba022SBarry Smith   PetscReal      shift = 0.0;
20b01c7715SBarry Smith 
21b01c7715SBarry Smith   PetscFunctionBegin;
229797317bSBarry Smith   if (a->idiagvalid) {
239797317bSBarry Smith     if (values) *values = a->idiag;
249797317bSBarry Smith     PetscFunctionReturn(0);
259797317bSBarry Smith   }
26b01c7715SBarry Smith   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27b01c7715SBarry Smith   diag_offset = a->diag;
28b01c7715SBarry Smith   if (!a->idiag) {
29de80f912SBarry Smith     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30de80f912SBarry Smith     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31b01c7715SBarry Smith   }
32b01c7715SBarry Smith   diag  = a->idiag;
33de80f912SBarry Smith   mdiag = a->idiag+bs2*mbs;
34bbead8a2SBarry Smith   if (values) *values = a->idiag;
35b01c7715SBarry Smith   /* factor and invert each block */
36521d7252SBarry Smith   switch (bs) {
37ab040260SJed Brown   case 1:
38ab040260SJed Brown     for (i=0; i<mbs; i++) {
39ab040260SJed Brown       odiag    = v + 1*diag_offset[i];
40ab040260SJed Brown       diag[0]  = odiag[0];
41ab040260SJed Brown       mdiag[0] = odiag[0];
42d4a378daSJed Brown       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43ab040260SJed Brown       diag    += 1;
44ab040260SJed Brown       mdiag   += 1;
45ab040260SJed Brown     }
46ab040260SJed Brown     break;
47b01c7715SBarry Smith   case 2:
48b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
49b01c7715SBarry Smith       odiag    = v + 4*diag_offset[i];
50b01c7715SBarry Smith       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51b01c7715SBarry Smith       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
5296b95a6bSBarry Smith       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53b01c7715SBarry Smith       diag    += 4;
54b01c7715SBarry Smith       mdiag   += 4;
55b01c7715SBarry Smith     }
56b01c7715SBarry Smith     break;
57b01c7715SBarry Smith   case 3:
58b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
59b01c7715SBarry Smith       odiag    = v + 9*diag_offset[i];
60b01c7715SBarry Smith       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61b01c7715SBarry Smith       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62b01c7715SBarry Smith       diag[8]  = odiag[8];
63b01c7715SBarry Smith       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64b01c7715SBarry Smith       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65b01c7715SBarry Smith       mdiag[8] = odiag[8];
6696b95a6bSBarry Smith       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67b01c7715SBarry Smith       diag    += 9;
68b01c7715SBarry Smith       mdiag   += 9;
69b01c7715SBarry Smith     }
70b01c7715SBarry Smith     break;
71b01c7715SBarry Smith   case 4:
72b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
73b01c7715SBarry Smith       odiag  = v + 16*diag_offset[i];
74b01c7715SBarry Smith       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75b01c7715SBarry Smith       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
7696b95a6bSBarry Smith       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77b01c7715SBarry Smith       diag  += 16;
78b01c7715SBarry Smith       mdiag += 16;
79b01c7715SBarry Smith     }
80b01c7715SBarry Smith     break;
81b01c7715SBarry Smith   case 5:
82b01c7715SBarry Smith     for (i=0; i<mbs; i++) {
83b01c7715SBarry Smith       odiag  = v + 25*diag_offset[i];
84b01c7715SBarry Smith       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85b01c7715SBarry Smith       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
8696b95a6bSBarry Smith       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87b01c7715SBarry Smith       diag  += 25;
88b01c7715SBarry Smith       mdiag += 25;
89b01c7715SBarry Smith     }
90b01c7715SBarry Smith     break;
91d49b2adcSBarry Smith   case 6:
92d49b2adcSBarry Smith     for (i=0; i<mbs; i++) {
93d49b2adcSBarry Smith       odiag  = v + 36*diag_offset[i];
94d49b2adcSBarry Smith       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95d49b2adcSBarry Smith       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
9696b95a6bSBarry Smith       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97d49b2adcSBarry Smith       diag  += 36;
98d49b2adcSBarry Smith       mdiag += 36;
99d49b2adcSBarry Smith     }
100d49b2adcSBarry Smith     break;
101de80f912SBarry Smith   case 7:
102de80f912SBarry Smith     for (i=0; i<mbs; i++) {
103de80f912SBarry Smith       odiag  = v + 49*diag_offset[i];
104de80f912SBarry Smith       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105de80f912SBarry Smith       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
10696b95a6bSBarry Smith       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107de80f912SBarry Smith       diag  += 49;
108de80f912SBarry Smith       mdiag += 49;
109de80f912SBarry Smith     }
110de80f912SBarry Smith     break;
111b01c7715SBarry Smith   default:
112de80f912SBarry Smith     ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113de80f912SBarry Smith     for (i=0; i<mbs; i++) {
114de80f912SBarry Smith       odiag  = v + bs2*diag_offset[i];
115de80f912SBarry Smith       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116de80f912SBarry Smith       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
11796b95a6bSBarry Smith       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118de80f912SBarry Smith       diag  += bs2;
119de80f912SBarry Smith       mdiag += bs2;
120de80f912SBarry Smith     }
121de80f912SBarry Smith     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122b01c7715SBarry Smith   }
123b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
124b01c7715SBarry Smith   PetscFunctionReturn(0);
125b01c7715SBarry Smith }
126b01c7715SBarry Smith 
127*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_1_exp(v,A,w,exp) \
128*e48d15efSToby Isaac do {                                                  \
129*e48d15efSToby Isaac   v[0] exp A[0]*w[0];                                 \
130*e48d15efSToby Isaac } while (0)
1316d3beeddSMatthew Knepley 
132*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_2_exp(v,A,w,exp) \
133*e48d15efSToby Isaac do {                                                  \
134*e48d15efSToby Isaac   v[0] exp A[0]*w[0] + A[2]*w[1];                      \
135*e48d15efSToby Isaac   v[1] exp A[1]*w[0] + A[3]*w[1];                      \
136*e48d15efSToby Isaac } while (0)
1376d3beeddSMatthew Knepley 
138*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_3_exp(v,A,w,exp) \
139*e48d15efSToby Isaac do {                                                  \
140*e48d15efSToby Isaac   v[0] exp A[0]*w[0] + A[3]*w[1] + A[6]*w[2];         \
141*e48d15efSToby Isaac   v[1] exp A[1]*w[0] + A[4]*w[1] + A[7]*w[2];         \
142*e48d15efSToby Isaac   v[2] exp A[2]*w[0] + A[5]*w[1] + A[8]*w[2];         \
143*e48d15efSToby Isaac } while (0)
1446d3beeddSMatthew Knepley 
145*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_4_exp(v,A,w,exp)       \
146*e48d15efSToby Isaac do {                                                        \
147*e48d15efSToby Isaac   v[0] exp A[0]*w[0] + A[4]*w[1] + A[8] *w[2] + A[12]*w[3]; \
148*e48d15efSToby Isaac   v[1] exp A[1]*w[0] + A[5]*w[1] + A[9] *w[2] + A[13]*w[3]; \
149*e48d15efSToby Isaac   v[2] exp A[2]*w[0] + A[6]*w[1] + A[10]*w[2] + A[14]*w[3]; \
150*e48d15efSToby Isaac   v[3] exp A[3]*w[0] + A[7]*w[1] + A[11]*w[2] + A[15]*w[3]; \
151*e48d15efSToby Isaac } while (0)
1526d3beeddSMatthew Knepley 
153*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_4_exp(v,A,w,exp)       \
154*e48d15efSToby Isaac do {                                                        \
155*e48d15efSToby Isaac   v[0] exp A[0]*w[0] + A[4]*w[1] + A[8] *w[2] + A[12]*w[3]; \
156*e48d15efSToby Isaac   v[1] exp A[1]*w[0] + A[5]*w[1] + A[9] *w[2] + A[13]*w[3]; \
157*e48d15efSToby Isaac   v[2] exp A[2]*w[0] + A[6]*w[1] + A[10]*w[2] + A[14]*w[3]; \
158*e48d15efSToby Isaac   v[3] exp A[3]*w[0] + A[7]*w[1] + A[11]*w[2] + A[15]*w[3]; \
159*e48d15efSToby Isaac } while (0)
160*e48d15efSToby Isaac 
161*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_5_exp(v,A,w,exp)                    \
162*e48d15efSToby Isaac do {                                                                     \
163*e48d15efSToby Isaac   v[0] exp A[0]*w[0] + A[5]*w[1] + A[10]*w[2] + A[15]*w[3] + A[20]*w[4]; \
164*e48d15efSToby Isaac   v[1] exp A[1]*w[0] + A[6]*w[1] + A[11]*w[2] + A[16]*w[3] + A[21]*w[4]; \
165*e48d15efSToby Isaac   v[2] exp A[2]*w[0] + A[7]*w[1] + A[12]*w[2] + A[17]*w[3] + A[22]*w[4]; \
166*e48d15efSToby Isaac   v[3] exp A[3]*w[0] + A[8]*w[1] + A[13]*w[2] + A[18]*w[3] + A[23]*w[4]; \
167*e48d15efSToby Isaac   v[4] exp A[4]*w[0] + A[9]*w[1] + A[14]*w[2] + A[19]*w[3] + A[24]*w[4]; \
168*e48d15efSToby Isaac } while (0)
169*e48d15efSToby Isaac 
170*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_6_exp(v,A,w,exp)                                  \
171*e48d15efSToby Isaac do {                                                                                   \
172*e48d15efSToby Isaac   v[0] exp A[0]*w[0] + A[6] *w[1] + A[12]*w[2] + A[18]*w[3] + A[24]*w[4] + A[30]*w[5]; \
173*e48d15efSToby Isaac   v[1] exp A[1]*w[0] + A[7] *w[1] + A[13]*w[2] + A[19]*w[3] + A[25]*w[4] + A[31]*w[5]; \
174*e48d15efSToby Isaac   v[2] exp A[2]*w[0] + A[8] *w[1] + A[14]*w[2] + A[20]*w[3] + A[26]*w[4] + A[32]*w[5]; \
175*e48d15efSToby Isaac   v[3] exp A[3]*w[0] + A[9] *w[1] + A[15]*w[2] + A[21]*w[3] + A[27]*w[4] + A[33]*w[5]; \
176*e48d15efSToby Isaac   v[4] exp A[4]*w[0] + A[10]*w[1] + A[16]*w[2] + A[22]*w[3] + A[28]*w[4] + A[34]*w[5]; \
177*e48d15efSToby Isaac   v[5] exp A[5]*w[0] + A[11]*w[1] + A[17]*w[2] + A[23]*w[3] + A[29]*w[4] + A[35]*w[5]; \
178*e48d15efSToby Isaac } while (0)
179*e48d15efSToby Isaac 
180*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_7_exp(v,A,w,exp)                                               \
181*e48d15efSToby Isaac do {                                                                                                \
182*e48d15efSToby Isaac   v[0] exp A[0]*w[0] + A[7] *w[1] + A[14]*w[2] + A[21]*w[3] + A[28]*w[4] + A[35]*w[5] + A[42]*w[6]; \
183*e48d15efSToby Isaac   v[1] exp A[1]*w[0] + A[8] *w[1] + A[15]*w[2] + A[22]*w[3] + A[29]*w[4] + A[36]*w[5] + A[43]*w[6]; \
184*e48d15efSToby Isaac   v[2] exp A[2]*w[0] + A[9] *w[1] + A[16]*w[2] + A[23]*w[3] + A[30]*w[4] + A[37]*w[5] + A[44]*w[6]; \
185*e48d15efSToby Isaac   v[3] exp A[3]*w[0] + A[10]*w[1] + A[17]*w[2] + A[24]*w[3] + A[31]*w[4] + A[38]*w[5] + A[45]*w[6]; \
186*e48d15efSToby Isaac   v[4] exp A[4]*w[0] + A[11]*w[1] + A[18]*w[2] + A[25]*w[3] + A[32]*w[4] + A[39]*w[5] + A[46]*w[6]; \
187*e48d15efSToby Isaac   v[5] exp A[5]*w[0] + A[12]*w[1] + A[19]*w[2] + A[26]*w[3] + A[33]*w[4] + A[40]*w[5] + A[47]*w[6]; \
188*e48d15efSToby Isaac   v[6] exp A[6]*w[0] + A[13]*w[1] + A[20]*w[2] + A[27]*w[3] + A[34]*w[4] + A[41]*w[5] + A[48]*w[6]; \
189*e48d15efSToby Isaac } while (0)
190*e48d15efSToby Isaac 
191*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,=)
192*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,=)
193*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,=)
194*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,=)
195*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,=)
196*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,=)
197*e48d15efSToby Isaac #define PetscKernel_v_gets_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,=)
198*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,+=)
199*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,+=)
200*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,+=)
201*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,+=)
202*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,+=)
203*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,+=)
204*e48d15efSToby Isaac #define PetscKernel_v_gets_v_plus_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,+=)
205*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_1(v,A,w) PetscKernel_v_gets_A_times_w_1_exp(v,A,w,-=)
206*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_2(v,A,w) PetscKernel_v_gets_A_times_w_2_exp(v,A,w,-=)
207*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_3(v,A,w) PetscKernel_v_gets_A_times_w_3_exp(v,A,w,-=)
208*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_4(v,A,w) PetscKernel_v_gets_A_times_w_4_exp(v,A,w,-=)
209*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_5(v,A,w) PetscKernel_v_gets_A_times_w_5_exp(v,A,w,-=)
210*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_6(v,A,w) PetscKernel_v_gets_A_times_w_6_exp(v,A,w,-=)
211*e48d15efSToby Isaac #define PetscKernel_v_gets_v_minus_A_times_w_7(v,A,w) PetscKernel_v_gets_A_times_w_7_exp(v,A,w,-=)
2126d3beeddSMatthew Knepley 
2136d3beeddSMatthew Knepley #undef __FUNCT__
214*e48d15efSToby Isaac #define __FUNCT__ "MatSOR_SeqBAIJ"
215*e48d15efSToby Isaac PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
216b01c7715SBarry Smith {
217b01c7715SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
218*e48d15efSToby Isaac   PetscScalar       *x,*work,*w,*workt,*t;
219*e48d15efSToby Isaac   const MatScalar   *v,*aa = a->a, *idiag;
220*e48d15efSToby Isaac   const PetscScalar *b,*xb;
221*e48d15efSToby Isaac   PetscScalar       s[7], xw[7];
222dfbe8321SBarry Smith   PetscErrorCode    ierr;
223*e48d15efSToby Isaac   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
224c1ac3661SBarry Smith   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
225b01c7715SBarry Smith 
226b01c7715SBarry Smith   PetscFunctionBegin;
227b01c7715SBarry Smith   its = its*lits;
228e32f2f54SBarry Smith   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
229e32f2f54SBarry Smith   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230e32f2f54SBarry Smith   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231e32f2f54SBarry Smith   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232e32f2f54SBarry Smith   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233b01c7715SBarry Smith 
2340298fd71SBarry Smith   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
235b01c7715SBarry Smith 
236b2ec919aSToby Isaac   if (!m) PetscFunctionReturn(0);
237b01c7715SBarry Smith   diag  = a->diag;
238b01c7715SBarry Smith   idiag = a->idiag;
239de80f912SBarry Smith   k    = PetscMax(A->rmap->n,A->cmap->n);
240*e48d15efSToby Isaac   if (!a->mult_work) {
241*e48d15efSToby Isaac     ierr = PetscMalloc((2*k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
242de80f912SBarry Smith   }
243de80f912SBarry Smith   work = a->mult_work;
244*e48d15efSToby Isaac   t = work + k+1;
245de80f912SBarry Smith   if (!a->sor_work) {
246de80f912SBarry Smith     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
247de80f912SBarry Smith   }
248de80f912SBarry Smith   w = a->sor_work;
249de80f912SBarry Smith 
250de80f912SBarry Smith   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
251de80f912SBarry Smith   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
252de80f912SBarry Smith 
253de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
254de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
255*e48d15efSToby Isaac       switch (bs) {
256*e48d15efSToby Isaac       case 1:
257*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
258*e48d15efSToby Isaac         t[0] = b[0];
259*e48d15efSToby Isaac         i2     = 1;
260*e48d15efSToby Isaac         idiag += 1;
261*e48d15efSToby Isaac         for (i=1; i<m; i++) {
262*e48d15efSToby Isaac           v  = aa + ai[i];
263*e48d15efSToby Isaac           vi = aj + ai[i];
264*e48d15efSToby Isaac           nz = diag[i] - ai[i];
265*e48d15efSToby Isaac           s[0] = b[i2];
266*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
267*e48d15efSToby Isaac             xw[0] = x[vi[j]];
268*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
269*e48d15efSToby Isaac           }
270*e48d15efSToby Isaac           t[i2] = s[0];
271*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
272*e48d15efSToby Isaac           x[i2]  = xw[0];
273*e48d15efSToby Isaac           idiag += 1;
274*e48d15efSToby Isaac           i2    += 1;
275*e48d15efSToby Isaac         }
276*e48d15efSToby Isaac         break;
277*e48d15efSToby Isaac       case 2:
278*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
279*e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1];
280*e48d15efSToby Isaac         i2     = 2;
281*e48d15efSToby Isaac         idiag += 4;
282*e48d15efSToby Isaac         for (i=1; i<m; i++) {
283*e48d15efSToby Isaac           v  = aa + 4*ai[i];
284*e48d15efSToby Isaac           vi = aj + ai[i];
285*e48d15efSToby Isaac           nz = diag[i] - ai[i];
286*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
287*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
288*e48d15efSToby Isaac             idx = 2*vi[j];
289*e48d15efSToby Isaac             it  = 4*j;
290*e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
291*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
292*e48d15efSToby Isaac           }
293*e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1];
294*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
295*e48d15efSToby Isaac           x[i2]   = xw[0]; x[i2+1] = xw[1];
296*e48d15efSToby Isaac           idiag  += 4;
297*e48d15efSToby Isaac           i2     += 2;
298*e48d15efSToby Isaac         }
299*e48d15efSToby Isaac         break;
300*e48d15efSToby Isaac       case 3:
301*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
302*e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
303*e48d15efSToby Isaac         i2     = 3;
304*e48d15efSToby Isaac         idiag += 9;
305*e48d15efSToby Isaac         for (i=1; i<m; i++) {
306*e48d15efSToby Isaac           v  = aa + 9*ai[i];
307*e48d15efSToby Isaac           vi = aj + ai[i];
308*e48d15efSToby Isaac           nz = diag[i] - ai[i];
309*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
310*e48d15efSToby Isaac           while (nz--) {
311*e48d15efSToby Isaac             idx = 3*(*vi++);
312*e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
313*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
314*e48d15efSToby Isaac             v  += 9;
315*e48d15efSToby Isaac           }
316*e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
317*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
318*e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
319*e48d15efSToby Isaac           idiag  += 9;
320*e48d15efSToby Isaac           i2     += 3;
321*e48d15efSToby Isaac         }
322*e48d15efSToby Isaac         break;
323*e48d15efSToby Isaac       case 4:
324*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
325*e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
326*e48d15efSToby Isaac         i2     = 4;
327*e48d15efSToby Isaac         idiag += 16;
328*e48d15efSToby Isaac         for (i=1; i<m; i++) {
329*e48d15efSToby Isaac           v  = aa + 16*ai[i];
330*e48d15efSToby Isaac           vi = aj + ai[i];
331*e48d15efSToby Isaac           nz = diag[i] - ai[i];
332*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
333*e48d15efSToby Isaac           while (nz--) {
334*e48d15efSToby Isaac             idx = 4*(*vi++);
335*e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
336*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
337*e48d15efSToby Isaac             v  += 16;
338*e48d15efSToby Isaac           }
339*e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
340*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
341*e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
342*e48d15efSToby Isaac           idiag  += 16;
343*e48d15efSToby Isaac           i2     += 4;
344*e48d15efSToby Isaac         }
345*e48d15efSToby Isaac         break;
346*e48d15efSToby Isaac       case 5:
347*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
348*e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
349*e48d15efSToby Isaac         i2     = 5;
350*e48d15efSToby Isaac         idiag += 25;
351*e48d15efSToby Isaac         for (i=1; i<m; i++) {
352*e48d15efSToby Isaac           v  = aa + 25*ai[i];
353*e48d15efSToby Isaac           vi = aj + ai[i];
354*e48d15efSToby Isaac           nz = diag[i] - ai[i];
355*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
356*e48d15efSToby Isaac           while (nz--) {
357*e48d15efSToby Isaac             idx = 5*(*vi++);
358*e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
359*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
360*e48d15efSToby Isaac             v  += 25;
361*e48d15efSToby Isaac           }
362*e48d15efSToby Isaac           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
363*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
364*e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
365*e48d15efSToby Isaac           idiag  += 25;
366*e48d15efSToby Isaac           i2     += 5;
367*e48d15efSToby Isaac         }
368*e48d15efSToby Isaac         break;
369*e48d15efSToby Isaac       case 6:
370*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
371*e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
372*e48d15efSToby Isaac         i2     = 6;
373*e48d15efSToby Isaac         idiag += 36;
374*e48d15efSToby Isaac         for (i=1; i<m; i++) {
375*e48d15efSToby Isaac           v  = aa + 36*ai[i];
376*e48d15efSToby Isaac           vi = aj + ai[i];
377*e48d15efSToby Isaac           nz = diag[i] - ai[i];
378*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
379*e48d15efSToby Isaac           while (nz--) {
380*e48d15efSToby Isaac             idx = 6*(*vi++);
381*e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
382*e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
383*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
384*e48d15efSToby Isaac             v  += 36;
385*e48d15efSToby Isaac           }
386*e48d15efSToby Isaac           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
387*e48d15efSToby Isaac           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
388*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
389*e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
390*e48d15efSToby Isaac           idiag  += 36;
391*e48d15efSToby Isaac           i2     += 6;
392*e48d15efSToby Isaac         }
393*e48d15efSToby Isaac         break;
394*e48d15efSToby Isaac       case 7:
395*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
396*e48d15efSToby Isaac         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
397*e48d15efSToby Isaac         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
398*e48d15efSToby Isaac         i2     = 7;
399*e48d15efSToby Isaac         idiag += 49;
400*e48d15efSToby Isaac         for (i=1; i<m; i++) {
401*e48d15efSToby Isaac           v  = aa + 49*ai[i];
402*e48d15efSToby Isaac           vi = aj + ai[i];
403*e48d15efSToby Isaac           nz = diag[i] - ai[i];
404*e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
405*e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
406*e48d15efSToby Isaac           while (nz--) {
407*e48d15efSToby Isaac             idx = 7*(*vi++);
408*e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
409*e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
410*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
411*e48d15efSToby Isaac             v  += 49;
412*e48d15efSToby Isaac           }
413*e48d15efSToby Isaac           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
414*e48d15efSToby Isaac           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
415*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
416*e48d15efSToby Isaac           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
417*e48d15efSToby Isaac           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
418*e48d15efSToby Isaac           idiag  += 49;
419*e48d15efSToby Isaac           i2     += 7;
420*e48d15efSToby Isaac         }
421*e48d15efSToby Isaac         break;
422*e48d15efSToby Isaac       default:
42396b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
424*e48d15efSToby Isaac         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
425de80f912SBarry Smith         i2     = bs;
426de80f912SBarry Smith         idiag += bs2;
427de80f912SBarry Smith         for (i=1; i<m; i++) {
428de80f912SBarry Smith           v  = aa + bs2*ai[i];
429de80f912SBarry Smith           vi = aj + ai[i];
430de80f912SBarry Smith           nz = diag[i] - ai[i];
431de80f912SBarry Smith 
432de80f912SBarry Smith           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
433de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
434de80f912SBarry Smith           workt = work;
435de80f912SBarry Smith           for (j=0; j<nz; j++) {
436de80f912SBarry Smith             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
437de80f912SBarry Smith             workt += bs;
438de80f912SBarry Smith           }
43996b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
440*e48d15efSToby Isaac           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
44196b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
442de80f912SBarry Smith 
443de80f912SBarry Smith           idiag += bs2;
444de80f912SBarry Smith           i2    += bs;
445de80f912SBarry Smith         }
446*e48d15efSToby Isaac         break;
447*e48d15efSToby Isaac       }
448de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
449*e48d15efSToby Isaac       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
450*e48d15efSToby Isaac       xb = t;
451de80f912SBarry Smith     }
452*e48d15efSToby Isaac     else xb = b;
453de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
454*e48d15efSToby Isaac       idiag = a->idiag+bs2*(a->mbs-1);
455*e48d15efSToby Isaac       i2 = bs * (m-1);
456*e48d15efSToby Isaac       switch (bs) {
457*e48d15efSToby Isaac       case 1:
458*e48d15efSToby Isaac         s[0]  = xb[i2];
459*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
460*e48d15efSToby Isaac         x[i2] = xw[0];
461*e48d15efSToby Isaac         i2   -= 1;
462*e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
463*e48d15efSToby Isaac           v  = aa + (diag[i]+1);
464*e48d15efSToby Isaac           vi = aj + diag[i] + 1;
465*e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
466*e48d15efSToby Isaac           s[0] = xb[i2];
467*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
468*e48d15efSToby Isaac             xw[0] = x[vi[j]];
469*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
470*e48d15efSToby Isaac           }
471*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
472*e48d15efSToby Isaac           x[i2]  = xw[0];
473*e48d15efSToby Isaac           idiag -= 1;
474*e48d15efSToby Isaac           i2    -= 1;
475*e48d15efSToby Isaac         }
476*e48d15efSToby Isaac         break;
477*e48d15efSToby Isaac       case 2:
478*e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1];
479*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
480*e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1];
481*e48d15efSToby Isaac         i2    -= 2;
482*e48d15efSToby Isaac         idiag -= 4;
483*e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
484*e48d15efSToby Isaac           v  = aa + 4*(diag[i] + 1);
485*e48d15efSToby Isaac           vi = aj + diag[i] + 1;
486*e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
487*e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1];
488*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
489*e48d15efSToby Isaac             idx = 2*vi[j];
490*e48d15efSToby Isaac             it  = 4*j;
491*e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
492*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
493*e48d15efSToby Isaac           }
494*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
495*e48d15efSToby Isaac           x[i2]   = xw[0]; x[i2+1] = xw[1];
496*e48d15efSToby Isaac           idiag  -= 4;
497*e48d15efSToby Isaac           i2     -= 2;
498*e48d15efSToby Isaac         }
499*e48d15efSToby Isaac         break;
500*e48d15efSToby Isaac       case 3:
501*e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
502*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
503*e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
504*e48d15efSToby Isaac         i2    -= 3;
505*e48d15efSToby Isaac         idiag -= 9;
506*e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
507*e48d15efSToby Isaac           v  = aa + 9*(diag[i]+1);
508*e48d15efSToby Isaac           vi = aj + diag[i] + 1;
509*e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
510*e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
511*e48d15efSToby Isaac           while (nz--) {
512*e48d15efSToby Isaac             idx = 3*(*vi++);
513*e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
514*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
515*e48d15efSToby Isaac             v  += 9;
516*e48d15efSToby Isaac           }
517*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
518*e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
519*e48d15efSToby Isaac           idiag  -= 9;
520*e48d15efSToby Isaac           i2     -= 3;
521*e48d15efSToby Isaac         }
522*e48d15efSToby Isaac         break;
523*e48d15efSToby Isaac       case 4:
524*e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
525*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
526*e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
527*e48d15efSToby Isaac         i2    -= 4;
528*e48d15efSToby Isaac         idiag -= 16;
529*e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
530*e48d15efSToby Isaac           v  = aa + 16*(diag[i]+1);
531*e48d15efSToby Isaac           vi = aj + diag[i] + 1;
532*e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
533*e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
534*e48d15efSToby Isaac           while (nz--) {
535*e48d15efSToby Isaac             idx = 4*(*vi++);
536*e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
537*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
538*e48d15efSToby Isaac             v  += 16;
539*e48d15efSToby Isaac           }
540*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
541*e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
542*e48d15efSToby Isaac           idiag  -= 16;
543*e48d15efSToby Isaac           i2     -= 4;
544*e48d15efSToby Isaac         }
545*e48d15efSToby Isaac         break;
546*e48d15efSToby Isaac       case 5:
547*e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
548*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
549*e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
550*e48d15efSToby Isaac         i2    -= 5;
551*e48d15efSToby Isaac         idiag -= 25;
552*e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
553*e48d15efSToby Isaac           v  = aa + 25*(diag[i]+1);
554*e48d15efSToby Isaac           vi = aj + diag[i] + 1;
555*e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
556*e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
557*e48d15efSToby Isaac           while (nz--) {
558*e48d15efSToby Isaac             idx = 5*(*vi++);
559*e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
560*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
561*e48d15efSToby Isaac             v  += 25;
562*e48d15efSToby Isaac           }
563*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
564*e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
565*e48d15efSToby Isaac           idiag  -= 25;
566*e48d15efSToby Isaac           i2     -= 5;
567*e48d15efSToby Isaac         }
568*e48d15efSToby Isaac         break;
569*e48d15efSToby Isaac       case 6:
570*e48d15efSToby Isaac         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
571*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
572*e48d15efSToby Isaac         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
573*e48d15efSToby Isaac         i2    -= 6;
574*e48d15efSToby Isaac         idiag -= 36;
575*e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
576*e48d15efSToby Isaac           v  = aa + 36*(diag[i]+1);
577*e48d15efSToby Isaac           vi = aj + diag[i] + 1;
578*e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
579*e48d15efSToby Isaac           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
580*e48d15efSToby Isaac           while (nz--) {
581*e48d15efSToby Isaac             idx = 6*(*vi++);
582*e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
583*e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
584*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
585*e48d15efSToby Isaac             v  += 36;
586*e48d15efSToby Isaac           }
587*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
588*e48d15efSToby Isaac           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
589*e48d15efSToby Isaac           idiag  -= 36;
590*e48d15efSToby Isaac           i2     -= 6;
591*e48d15efSToby Isaac         }
592*e48d15efSToby Isaac         break;
593*e48d15efSToby Isaac       case 7:
594*e48d15efSToby Isaac         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
595*e48d15efSToby Isaac         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
596*e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
597*e48d15efSToby Isaac         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
598*e48d15efSToby Isaac         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
599*e48d15efSToby Isaac         i2    -= 7;
600*e48d15efSToby Isaac         idiag -= 49;
601*e48d15efSToby Isaac         for (i=m-2; i>=0; i--) {
602*e48d15efSToby Isaac           v  = aa + 49*(diag[i]+1);
603*e48d15efSToby Isaac           vi = aj + diag[i] + 1;
604*e48d15efSToby Isaac           nz = ai[i+1] - diag[i] - 1;
605*e48d15efSToby Isaac           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
606*e48d15efSToby Isaac           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
607*e48d15efSToby Isaac           while (nz--) {
608*e48d15efSToby Isaac             idx = 7*(*vi++);
609*e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
610*e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
611*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
612*e48d15efSToby Isaac             v  += 49;
613*e48d15efSToby Isaac           }
614*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
615*e48d15efSToby Isaac           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
616*e48d15efSToby Isaac           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
617*e48d15efSToby Isaac           idiag  -= 49;
618*e48d15efSToby Isaac           i2     -= 7;
619*e48d15efSToby Isaac         }
620*e48d15efSToby Isaac         break;
621*e48d15efSToby Isaac       default:
622*e48d15efSToby Isaac         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
62396b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
624de80f912SBarry Smith         i2    -= bs;
625*e48d15efSToby Isaac         idiag -= bs2;
626de80f912SBarry Smith         for (i=m-2; i>=0; i--) {
627de80f912SBarry Smith           v  = aa + bs2*(diag[i]+1);
628de80f912SBarry Smith           vi = aj + diag[i] + 1;
629de80f912SBarry Smith           nz = ai[i+1] - diag[i] - 1;
630de80f912SBarry Smith 
631*e48d15efSToby Isaac           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
632de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
633de80f912SBarry Smith           workt = work;
634de80f912SBarry Smith           for (j=0; j<nz; j++) {
635de80f912SBarry Smith             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
636de80f912SBarry Smith             workt += bs;
637de80f912SBarry Smith           }
63896b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
63996b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
640*e48d15efSToby Isaac 
641de80f912SBarry Smith           idiag -= bs2;
642de80f912SBarry Smith           i2    -= bs;
643de80f912SBarry Smith         }
644*e48d15efSToby Isaac         break;
645*e48d15efSToby Isaac       }
646de80f912SBarry Smith       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
647de80f912SBarry Smith     }
648*e48d15efSToby Isaac     its--;
649*e48d15efSToby Isaac   }
650*e48d15efSToby Isaac   while (its--) {
651*e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
652*e48d15efSToby Isaac       idiag = a->idiag;
653*e48d15efSToby Isaac       i2 = 0;
654*e48d15efSToby Isaac       switch (bs) {
655*e48d15efSToby Isaac       case 1:
656*e48d15efSToby Isaac         for (i=0; i<m; i++) {
657*e48d15efSToby Isaac           v  = aa + ai[i];
658*e48d15efSToby Isaac           vi = aj + ai[i];
659*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
660*e48d15efSToby Isaac           s[0] = b[i2];
661*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
662*e48d15efSToby Isaac             xw[0] = x[vi[j]];
663*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
664*e48d15efSToby Isaac           }
665*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
666*e48d15efSToby Isaac           x[i2] += xw[0];
667*e48d15efSToby Isaac           idiag += 1;
668*e48d15efSToby Isaac           i2    += 1;
669*e48d15efSToby Isaac         }
670*e48d15efSToby Isaac         break;
671*e48d15efSToby Isaac       case 2:
672*e48d15efSToby Isaac         for (i=0; i<m; i++) {
673*e48d15efSToby Isaac           v  = aa + 4*ai[i];
674*e48d15efSToby Isaac           vi = aj + ai[i];
675*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
676*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
677*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
678*e48d15efSToby Isaac             idx = 2*vi[j];
679*e48d15efSToby Isaac             it  = 4*j;
680*e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
681*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
682*e48d15efSToby Isaac           }
683*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
684*e48d15efSToby Isaac           x[i2]  += xw[0]; x[i2+1] += xw[1];
685*e48d15efSToby Isaac           idiag  += 4;
686*e48d15efSToby Isaac           i2     += 2;
687*e48d15efSToby Isaac         }
688*e48d15efSToby Isaac         break;
689*e48d15efSToby Isaac       case 3:
690*e48d15efSToby Isaac         for (i=0; i<m; i++) {
691*e48d15efSToby Isaac           v  = aa + 9*ai[i];
692*e48d15efSToby Isaac           vi = aj + ai[i];
693*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
694*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
695*e48d15efSToby Isaac           while (nz--) {
696*e48d15efSToby Isaac             idx = 3*(*vi++);
697*e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
698*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
699*e48d15efSToby Isaac             v  += 9;
700*e48d15efSToby Isaac           }
701*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
702*e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703*e48d15efSToby Isaac           idiag  += 9;
704*e48d15efSToby Isaac           i2     += 3;
705*e48d15efSToby Isaac         }
706*e48d15efSToby Isaac         break;
707*e48d15efSToby Isaac       case 4:
708*e48d15efSToby Isaac         for (i=0; i<m; i++) {
709*e48d15efSToby Isaac           v  = aa + 16*ai[i];
710*e48d15efSToby Isaac           vi = aj + ai[i];
711*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
712*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
713*e48d15efSToby Isaac           while (nz--) {
714*e48d15efSToby Isaac             idx = 4*(*vi++);
715*e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
716*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
717*e48d15efSToby Isaac             v  += 16;
718*e48d15efSToby Isaac           }
719*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
720*e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
721*e48d15efSToby Isaac           idiag  += 16;
722*e48d15efSToby Isaac           i2     += 4;
723*e48d15efSToby Isaac         }
724*e48d15efSToby Isaac         break;
725*e48d15efSToby Isaac       case 5:
726*e48d15efSToby Isaac         for (i=0; i<m; i++) {
727*e48d15efSToby Isaac           v  = aa + 25*ai[i];
728*e48d15efSToby Isaac           vi = aj + ai[i];
729*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
730*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
731*e48d15efSToby Isaac           while (nz--) {
732*e48d15efSToby Isaac             idx = 5*(*vi++);
733*e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
734*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
735*e48d15efSToby Isaac             v  += 25;
736*e48d15efSToby Isaac           }
737*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
738*e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
739*e48d15efSToby Isaac           idiag  += 25;
740*e48d15efSToby Isaac           i2     += 5;
741*e48d15efSToby Isaac         }
742*e48d15efSToby Isaac         break;
743*e48d15efSToby Isaac       case 6:
744*e48d15efSToby Isaac         for (i=0; i<m; i++) {
745*e48d15efSToby Isaac           v  = aa + 36*ai[i];
746*e48d15efSToby Isaac           vi = aj + ai[i];
747*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
748*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
749*e48d15efSToby Isaac           while (nz--) {
750*e48d15efSToby Isaac             idx = 6*(*vi++);
751*e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
752*e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
753*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
754*e48d15efSToby Isaac             v  += 36;
755*e48d15efSToby Isaac           }
756*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
757*e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
758*e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
759*e48d15efSToby Isaac           idiag  += 36;
760*e48d15efSToby Isaac           i2     += 6;
761*e48d15efSToby Isaac         }
762*e48d15efSToby Isaac         break;
763*e48d15efSToby Isaac       case 7:
764*e48d15efSToby Isaac         for (i=0; i<m; i++) {
765*e48d15efSToby Isaac           v  = aa + 49*ai[i];
766*e48d15efSToby Isaac           vi = aj + ai[i];
767*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
768*e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
769*e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
770*e48d15efSToby Isaac           while (nz--) {
771*e48d15efSToby Isaac             idx = 7*(*vi++);
772*e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
773*e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
774*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
775*e48d15efSToby Isaac             v  += 49;
776*e48d15efSToby Isaac           }
777*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
778*e48d15efSToby Isaac           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
779*e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
780*e48d15efSToby Isaac           idiag  += 49;
781*e48d15efSToby Isaac           i2     += 7;
782*e48d15efSToby Isaac         }
783*e48d15efSToby Isaac         break;
784*e48d15efSToby Isaac       default:
785*e48d15efSToby Isaac         for (i=0; i<m; i++) {
786*e48d15efSToby Isaac           v  = aa + bs2*ai[i];
787*e48d15efSToby Isaac           vi = aj + ai[i];
788*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
789*e48d15efSToby Isaac 
790*e48d15efSToby Isaac           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
791*e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
792*e48d15efSToby Isaac           workt = work;
793*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
794*e48d15efSToby Isaac             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
795*e48d15efSToby Isaac             workt += bs;
796*e48d15efSToby Isaac           }
797*e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
798*e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
799*e48d15efSToby Isaac 
800*e48d15efSToby Isaac           idiag += bs2;
801*e48d15efSToby Isaac           i2    += bs;
802*e48d15efSToby Isaac         }
803*e48d15efSToby Isaac         break;
804*e48d15efSToby Isaac       }
805*e48d15efSToby Isaac       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
806*e48d15efSToby Isaac     }
807*e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
808*e48d15efSToby Isaac       idiag = a->idiag+bs2*(a->mbs-1);
809*e48d15efSToby Isaac       i2 = bs * (m-1);
810*e48d15efSToby Isaac       switch (bs) {
811*e48d15efSToby Isaac       case 1:
812*e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
813*e48d15efSToby Isaac           v  = aa + ai[i];
814*e48d15efSToby Isaac           vi = aj + ai[i];
815*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
816*e48d15efSToby Isaac           s[0] = b[i2];
817*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
818*e48d15efSToby Isaac             xw[0] = x[vi[j]];
819*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
820*e48d15efSToby Isaac           }
821*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
822*e48d15efSToby Isaac           x[i2] += xw[0];
823*e48d15efSToby Isaac           idiag -= 1;
824*e48d15efSToby Isaac           i2    -= 1;
825*e48d15efSToby Isaac         }
826*e48d15efSToby Isaac         break;
827*e48d15efSToby Isaac       case 2:
828*e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
829*e48d15efSToby Isaac           v  = aa + 4*ai[i];
830*e48d15efSToby Isaac           vi = aj + ai[i];
831*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
832*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1];
833*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
834*e48d15efSToby Isaac             idx = 2*vi[j];
835*e48d15efSToby Isaac             it  = 4*j;
836*e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx];
837*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
838*e48d15efSToby Isaac           }
839*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
840*e48d15efSToby Isaac           x[i2]  += xw[0]; x[i2+1] += xw[1];
841*e48d15efSToby Isaac           idiag  -= 4;
842*e48d15efSToby Isaac           i2     -= 2;
843*e48d15efSToby Isaac         }
844*e48d15efSToby Isaac         break;
845*e48d15efSToby Isaac       case 3:
846*e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
847*e48d15efSToby Isaac           v  = aa + 9*ai[i];
848*e48d15efSToby Isaac           vi = aj + ai[i];
849*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
850*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
851*e48d15efSToby Isaac           while (nz--) {
852*e48d15efSToby Isaac             idx = 3*(*vi++);
853*e48d15efSToby Isaac             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
854*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
855*e48d15efSToby Isaac             v  += 9;
856*e48d15efSToby Isaac           }
857*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
858*e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859*e48d15efSToby Isaac           idiag  -= 9;
860*e48d15efSToby Isaac           i2     -= 3;
861*e48d15efSToby Isaac         }
862*e48d15efSToby Isaac         break;
863*e48d15efSToby Isaac       case 4:
864*e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
865*e48d15efSToby Isaac           v  = aa + 16*ai[i];
866*e48d15efSToby Isaac           vi = aj + ai[i];
867*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
868*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
869*e48d15efSToby Isaac           while (nz--) {
870*e48d15efSToby Isaac             idx = 4*(*vi++);
871*e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
872*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
873*e48d15efSToby Isaac             v  += 16;
874*e48d15efSToby Isaac           }
875*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
876*e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
877*e48d15efSToby Isaac           idiag  -= 16;
878*e48d15efSToby Isaac           i2     -= 4;
879*e48d15efSToby Isaac         }
880*e48d15efSToby Isaac         break;
881*e48d15efSToby Isaac       case 5:
882*e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
883*e48d15efSToby Isaac           v  = aa + 25*ai[i];
884*e48d15efSToby Isaac           vi = aj + ai[i];
885*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
886*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
887*e48d15efSToby Isaac           while (nz--) {
888*e48d15efSToby Isaac             idx = 5*(*vi++);
889*e48d15efSToby Isaac             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
890*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
891*e48d15efSToby Isaac             v  += 25;
892*e48d15efSToby Isaac           }
893*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
894*e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
895*e48d15efSToby Isaac           idiag  -= 25;
896*e48d15efSToby Isaac           i2     -= 5;
897*e48d15efSToby Isaac         }
898*e48d15efSToby Isaac         break;
899*e48d15efSToby Isaac       case 6:
900*e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
901*e48d15efSToby Isaac           v  = aa + 36*ai[i];
902*e48d15efSToby Isaac           vi = aj + ai[i];
903*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
904*e48d15efSToby Isaac           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
905*e48d15efSToby Isaac           while (nz--) {
906*e48d15efSToby Isaac             idx = 6*(*vi++);
907*e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
908*e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
909*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
910*e48d15efSToby Isaac             v  += 36;
911*e48d15efSToby Isaac           }
912*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
913*e48d15efSToby Isaac           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
914*e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
915*e48d15efSToby Isaac           idiag  -= 36;
916*e48d15efSToby Isaac           i2     -= 6;
917*e48d15efSToby Isaac         }
918*e48d15efSToby Isaac         break;
919*e48d15efSToby Isaac       case 7:
920*e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
921*e48d15efSToby Isaac           v  = aa + 49*ai[i];
922*e48d15efSToby Isaac           vi = aj + ai[i];
923*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
924*e48d15efSToby Isaac           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
925*e48d15efSToby Isaac           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
926*e48d15efSToby Isaac           while (nz--) {
927*e48d15efSToby Isaac             idx = 7*(*vi++);
928*e48d15efSToby Isaac             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
929*e48d15efSToby Isaac             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
930*e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
931*e48d15efSToby Isaac             v  += 49;
932*e48d15efSToby Isaac           }
933*e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
934*e48d15efSToby Isaac           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
935*e48d15efSToby Isaac           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
936*e48d15efSToby Isaac           idiag  -= 49;
937*e48d15efSToby Isaac           i2     -= 7;
938*e48d15efSToby Isaac         }
939*e48d15efSToby Isaac         break;
940*e48d15efSToby Isaac       default:
941*e48d15efSToby Isaac         for (i=m-1; i>=0; i--) {
942*e48d15efSToby Isaac           v  = aa + bs2*ai[i];
943*e48d15efSToby Isaac           vi = aj + ai[i];
944*e48d15efSToby Isaac           nz = ai[i+1] - ai[i];
945*e48d15efSToby Isaac 
946*e48d15efSToby Isaac           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
947*e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
948*e48d15efSToby Isaac           workt = work;
949*e48d15efSToby Isaac           for (j=0; j<nz; j++) {
950*e48d15efSToby Isaac             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
951*e48d15efSToby Isaac             workt += bs;
952*e48d15efSToby Isaac           }
953*e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
954*e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
955*e48d15efSToby Isaac 
956*e48d15efSToby Isaac           idiag -= bs2;
957*e48d15efSToby Isaac           i2    -= bs;
958*e48d15efSToby Isaac         }
959*e48d15efSToby Isaac         break;
960*e48d15efSToby Isaac       }
961*e48d15efSToby Isaac       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
962*e48d15efSToby Isaac     }
963*e48d15efSToby Isaac   }
964de80f912SBarry Smith   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
965de80f912SBarry Smith   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
966de80f912SBarry Smith   PetscFunctionReturn(0);
967de80f912SBarry Smith }
968de80f912SBarry Smith 
969*e48d15efSToby Isaac 
970af674e45SBarry Smith /*
97181824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
972af674e45SBarry Smith */
973af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
974af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
975af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
976af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4
977af674e45SBarry Smith #endif
978af674e45SBarry Smith 
979af674e45SBarry Smith #undef __FUNCT__
980af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_"
9818cc058d9SJed Brown PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
982af674e45SBarry Smith {
983af674e45SBarry Smith   Mat               A  = *AA;
984af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
985c1ac3661SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
986c1ac3661SBarry Smith   PetscInt          *ai    =a->i,*ailen=a->ilen;
98717ec6a02SBarry Smith   PetscInt          *aj    =a->j,stepval,lastcol = -1;
988f15d580aSBarry Smith   const PetscScalar *value = v;
9894bb09213Spetsc   MatScalar         *ap,*aa = a->a,*bap;
990af674e45SBarry Smith 
991af674e45SBarry Smith   PetscFunctionBegin;
992ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
993af674e45SBarry Smith   stepval = (n-1)*4;
994af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
995af674e45SBarry Smith     row  = im[k];
996af674e45SBarry Smith     rp   = aj + ai[row];
997af674e45SBarry Smith     ap   = aa + 16*ai[row];
998af674e45SBarry Smith     nrow = ailen[row];
999af674e45SBarry Smith     low  = 0;
100017ec6a02SBarry Smith     high = nrow;
1001af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
1002af674e45SBarry Smith       col = in[l];
1003db4deed7SKarl Rupp       if (col <= lastcol)  low = 0;
1004db4deed7SKarl Rupp       else                high = nrow;
100517ec6a02SBarry Smith       lastcol = col;
10061e3347e8SBarry Smith       value   = v + k*(stepval+4 + l)*4;
1007af674e45SBarry Smith       while (high-low > 7) {
1008af674e45SBarry Smith         t = (low+high)/2;
1009af674e45SBarry Smith         if (rp[t] > col) high = t;
1010af674e45SBarry Smith         else             low  = t;
1011af674e45SBarry Smith       }
1012af674e45SBarry Smith       for (i=low; i<high; i++) {
1013af674e45SBarry Smith         if (rp[i] > col) break;
1014af674e45SBarry Smith         if (rp[i] == col) {
1015af674e45SBarry Smith           bap = ap +  16*i;
1016af674e45SBarry Smith           for (ii=0; ii<4; ii++,value+=stepval) {
1017af674e45SBarry Smith             for (jj=ii; jj<16; jj+=4) {
1018af674e45SBarry Smith               bap[jj] += *value++;
1019af674e45SBarry Smith             }
1020af674e45SBarry Smith           }
1021af674e45SBarry Smith           goto noinsert2;
1022af674e45SBarry Smith         }
1023af674e45SBarry Smith       }
1024af674e45SBarry Smith       N = nrow++ - 1;
102517ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1026af674e45SBarry Smith       /* shift up all the later entries in this row */
1027af674e45SBarry Smith       for (ii=N; ii>=i; ii--) {
1028af674e45SBarry Smith         rp[ii+1] = rp[ii];
1029a037b02bSBarry Smith         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1030af674e45SBarry Smith       }
1031af674e45SBarry Smith       if (N >= i) {
1032a037b02bSBarry Smith         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1033af674e45SBarry Smith       }
1034af674e45SBarry Smith       rp[i] = col;
1035af674e45SBarry Smith       bap   = ap +  16*i;
1036af674e45SBarry Smith       for (ii=0; ii<4; ii++,value+=stepval) {
1037af674e45SBarry Smith         for (jj=ii; jj<16; jj+=4) {
1038af674e45SBarry Smith           bap[jj] = *value++;
1039af674e45SBarry Smith         }
1040af674e45SBarry Smith       }
1041af674e45SBarry Smith       noinsert2:;
1042af674e45SBarry Smith       low = i;
1043af674e45SBarry Smith     }
1044af674e45SBarry Smith     ailen[row] = nrow;
1045af674e45SBarry Smith   }
1046be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1047af674e45SBarry Smith }
1048af674e45SBarry Smith 
1049af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1050af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4
1051af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1052af674e45SBarry Smith #define matsetvalues4_ matsetvalues4
1053af674e45SBarry Smith #endif
1054af674e45SBarry Smith 
1055af674e45SBarry Smith #undef __FUNCT__
1056af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_"
10578cc058d9SJed Brown PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1058af674e45SBarry Smith {
1059af674e45SBarry Smith   Mat         A  = *AA;
1060af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1061c1ac3661SBarry Smith   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1062c1ac3661SBarry Smith   PetscInt    *ai=a->i,*ailen=a->ilen;
1063c1ac3661SBarry Smith   PetscInt    *aj=a->j,brow,bcol;
106417ec6a02SBarry Smith   PetscInt    ridx,cidx,lastcol = -1;
1065af674e45SBarry Smith   MatScalar   *ap,value,*aa=a->a,*bap;
1066af674e45SBarry Smith 
1067af674e45SBarry Smith   PetscFunctionBegin;
1068af674e45SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
1069af674e45SBarry Smith     row  = im[k]; brow = row/4;
1070af674e45SBarry Smith     rp   = aj + ai[brow];
1071af674e45SBarry Smith     ap   = aa + 16*ai[brow];
1072af674e45SBarry Smith     nrow = ailen[brow];
1073af674e45SBarry Smith     low  = 0;
107417ec6a02SBarry Smith     high = nrow;
1075af674e45SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
1076af674e45SBarry Smith       col   = in[l]; bcol = col/4;
1077af674e45SBarry Smith       ridx  = row % 4; cidx = col % 4;
1078af674e45SBarry Smith       value = v[l + k*n];
1079db4deed7SKarl Rupp       if (col <= lastcol)  low = 0;
1080db4deed7SKarl Rupp       else                high = nrow;
108117ec6a02SBarry Smith       lastcol = col;
1082af674e45SBarry Smith       while (high-low > 7) {
1083af674e45SBarry Smith         t = (low+high)/2;
1084af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1085af674e45SBarry Smith         else              low  = t;
1086af674e45SBarry Smith       }
1087af674e45SBarry Smith       for (i=low; i<high; i++) {
1088af674e45SBarry Smith         if (rp[i] > bcol) break;
1089af674e45SBarry Smith         if (rp[i] == bcol) {
1090af674e45SBarry Smith           bap   = ap +  16*i + 4*cidx + ridx;
1091af674e45SBarry Smith           *bap += value;
1092af674e45SBarry Smith           goto noinsert1;
1093af674e45SBarry Smith         }
1094af674e45SBarry Smith       }
1095af674e45SBarry Smith       N = nrow++ - 1;
109617ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1097af674e45SBarry Smith       /* shift up all the later entries in this row */
1098af674e45SBarry Smith       for (ii=N; ii>=i; ii--) {
1099af674e45SBarry Smith         rp[ii+1] = rp[ii];
1100a037b02bSBarry Smith         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1101af674e45SBarry Smith       }
1102af674e45SBarry Smith       if (N>=i) {
1103a037b02bSBarry Smith         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1104af674e45SBarry Smith       }
1105af674e45SBarry Smith       rp[i]                    = bcol;
1106af674e45SBarry Smith       ap[16*i + 4*cidx + ridx] = value;
1107af674e45SBarry Smith noinsert1:;
1108af674e45SBarry Smith       low = i;
1109af674e45SBarry Smith     }
1110af674e45SBarry Smith     ailen[brow] = nrow;
1111af674e45SBarry Smith   }
1112be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1113af674e45SBarry Smith }
1114af674e45SBarry Smith 
1115be5855fcSBarry Smith /*
1116be5855fcSBarry Smith      Checks for missing diagonals
1117be5855fcSBarry Smith */
11184a2ae208SSatish Balay #undef __FUNCT__
11194a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1120ace3abfcSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1121be5855fcSBarry Smith {
1122be5855fcSBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
11236849ba73SBarry Smith   PetscErrorCode ierr;
1124c1ac3661SBarry Smith   PetscInt       *diag,*jj = a->j,i;
1125be5855fcSBarry Smith 
1126be5855fcSBarry Smith   PetscFunctionBegin;
1127c4992f7dSBarry Smith   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
11282af78befSBarry Smith   *missing = PETSC_FALSE;
11292efa7f71SHong Zhang   if (A->rmap->n > 0 && !jj) {
11302efa7f71SHong Zhang     *missing = PETSC_TRUE;
11312efa7f71SHong Zhang     if (d) *d = 0;
1132358d2f5dSShri Abhyankar     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
11332efa7f71SHong Zhang   } else {
1134883fce79SBarry Smith     diag = a->diag;
11350e8e8aceSBarry Smith     for (i=0; i<a->mbs; i++) {
1136be5855fcSBarry Smith       if (jj[diag[i]] != i) {
11372af78befSBarry Smith         *missing = PETSC_TRUE;
11382af78befSBarry Smith         if (d) *d = i;
11392efa7f71SHong Zhang         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1140358d2f5dSShri Abhyankar         break;
11412efa7f71SHong Zhang       }
1142be5855fcSBarry Smith     }
1143be5855fcSBarry Smith   }
1144be5855fcSBarry Smith   PetscFunctionReturn(0);
1145be5855fcSBarry Smith }
1146be5855fcSBarry Smith 
11474a2ae208SSatish Balay #undef __FUNCT__
11484a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1149dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1150de6a44a3SBarry Smith {
1151de6a44a3SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
11526849ba73SBarry Smith   PetscErrorCode ierr;
115309f38230SBarry Smith   PetscInt       i,j,m = a->mbs;
1154de6a44a3SBarry Smith 
11553a40ed3dSBarry Smith   PetscFunctionBegin;
115609f38230SBarry Smith   if (!a->diag) {
115709f38230SBarry Smith     ierr         = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
11584fd072dbSBarry Smith     ierr         = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
11594fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
116009f38230SBarry Smith   }
11617fc0212eSBarry Smith   for (i=0; i<m; i++) {
116209f38230SBarry Smith     a->diag[i] = a->i[i+1];
1163de6a44a3SBarry Smith     for (j=a->i[i]; j<a->i[i+1]; j++) {
1164de6a44a3SBarry Smith       if (a->j[j] == i) {
116509f38230SBarry Smith         a->diag[i] = j;
1166de6a44a3SBarry Smith         break;
1167de6a44a3SBarry Smith       }
1168de6a44a3SBarry Smith     }
1169de6a44a3SBarry Smith   }
11703a40ed3dSBarry Smith   PetscFunctionReturn(0);
1171de6a44a3SBarry Smith }
11722593348eSBarry Smith 
11732593348eSBarry Smith 
11744a2ae208SSatish Balay #undef __FUNCT__
11754a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
11761a83f524SJed Brown static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
11773b2fbd54SBarry Smith {
11783b2fbd54SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1179dfbe8321SBarry Smith   PetscErrorCode ierr;
11801a83f524SJed Brown   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
11811a83f524SJed Brown   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
11823b2fbd54SBarry Smith 
11833a40ed3dSBarry Smith   PetscFunctionBegin;
11843b2fbd54SBarry Smith   *nn = n;
11853a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
11863b2fbd54SBarry Smith   if (symmetric) {
11878f7157efSSatish Balay     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1188553b3c51SBarry Smith     nz   = tia[n];
11893b2fbd54SBarry Smith   } else {
11908f7157efSSatish Balay     tia = a->i; tja = a->j;
11913b2fbd54SBarry Smith   }
11923b2fbd54SBarry Smith 
1193ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1194ecc77c7aSBarry Smith     (*nn) *= bs;
11958f7157efSSatish Balay     /* malloc & create the natural set of indices */
1196ecc77c7aSBarry Smith     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
11979985e31cSBarry Smith     if (n) {
1198ecc77c7aSBarry Smith       (*ia)[0] = 0;
1199ecc77c7aSBarry Smith       for (j=1; j<bs; j++) {
1200ecc77c7aSBarry Smith         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1201ecc77c7aSBarry Smith       }
12029985e31cSBarry Smith     }
1203ecc77c7aSBarry Smith 
1204ecc77c7aSBarry Smith     for (i=1; i<n; i++) {
1205ecc77c7aSBarry Smith       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1206ecc77c7aSBarry Smith       for (j=1; j<bs; j++) {
1207ecc77c7aSBarry Smith         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
12088f7157efSSatish Balay       }
12098f7157efSSatish Balay     }
12109985e31cSBarry Smith     if (n) {
1211ecc77c7aSBarry Smith       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
12129985e31cSBarry Smith     }
1213ecc77c7aSBarry Smith 
12141a83f524SJed Brown     if (inja) {
12159985e31cSBarry Smith       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
12169985e31cSBarry Smith       cnt = 0;
12179985e31cSBarry Smith       for (i=0; i<n; i++) {
12189985e31cSBarry Smith         for (j=0; j<bs; j++) {
12199985e31cSBarry Smith           for (k=tia[i]; k<tia[i+1]; k++) {
12209985e31cSBarry Smith             for (l=0; l<bs; l++) {
12219985e31cSBarry Smith               (*ja)[cnt++] = bs*tja[k] + l;
12229985e31cSBarry Smith             }
12239985e31cSBarry Smith           }
12249985e31cSBarry Smith         }
12259985e31cSBarry Smith       }
12269985e31cSBarry Smith     }
12279985e31cSBarry Smith 
12288f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
12298f7157efSSatish Balay       ierr = PetscFree(tia);CHKERRQ(ierr);
12308f7157efSSatish Balay       ierr = PetscFree(tja);CHKERRQ(ierr);
12318f7157efSSatish Balay     }
1232f6d58c54SBarry Smith   } else if (oshift == 1) {
1233715a17b5SBarry Smith     if (symmetric) {
1234a2ea699eSBarry Smith       nz = tia[A->rmap->n/bs];
1235715a17b5SBarry Smith       /*  add 1 to i and j indices */
1236715a17b5SBarry Smith       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1237715a17b5SBarry Smith       *ia = tia;
1238715a17b5SBarry Smith       if (ja) {
1239715a17b5SBarry Smith         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1240715a17b5SBarry Smith         *ja = tja;
1241715a17b5SBarry Smith       }
1242715a17b5SBarry Smith     } else {
1243a2ea699eSBarry Smith       nz = a->i[A->rmap->n/bs];
1244f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
1245f6d58c54SBarry Smith       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1246f6d58c54SBarry Smith       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1247f6d58c54SBarry Smith       if (ja) {
1248f6d58c54SBarry Smith         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1249f6d58c54SBarry Smith         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1250f6d58c54SBarry Smith       }
1251715a17b5SBarry Smith     }
12528f7157efSSatish Balay   } else {
12538f7157efSSatish Balay     *ia = tia;
1254ecc77c7aSBarry Smith     if (ja) *ja = tja;
12558f7157efSSatish Balay   }
12563a40ed3dSBarry Smith   PetscFunctionReturn(0);
12573b2fbd54SBarry Smith }
12583b2fbd54SBarry Smith 
12594a2ae208SSatish Balay #undef __FUNCT__
12604a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
12611a83f524SJed Brown static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
12623b2fbd54SBarry Smith {
12636849ba73SBarry Smith   PetscErrorCode ierr;
12643b2fbd54SBarry Smith 
12653a40ed3dSBarry Smith   PetscFunctionBegin;
12663a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
1267715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1268606d414cSSatish Balay     ierr = PetscFree(*ia);CHKERRQ(ierr);
12699985e31cSBarry Smith     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
12703b2fbd54SBarry Smith   }
12713a40ed3dSBarry Smith   PetscFunctionReturn(0);
12723b2fbd54SBarry Smith }
12733b2fbd54SBarry Smith 
12744a2ae208SSatish Balay #undef __FUNCT__
12754a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ"
1276dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
12772d61bbb3SSatish Balay {
12782d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1279dfbe8321SBarry Smith   PetscErrorCode ierr;
12802d61bbb3SSatish Balay 
1281433994e6SBarry Smith   PetscFunctionBegin;
1282aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1283d0f46423SBarry Smith   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
12842d61bbb3SSatish Balay #endif
1285e6b907acSBarry Smith   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
12866bf464f9SBarry Smith   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
12876bf464f9SBarry Smith   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
12884fd072dbSBarry Smith   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
128905b42c5fSBarry Smith   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
12904fd072dbSBarry Smith   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
129105b42c5fSBarry Smith   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
129205b42c5fSBarry Smith   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1293de80f912SBarry Smith   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
12946bf464f9SBarry Smith   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
129505b42c5fSBarry Smith   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
129605b42c5fSBarry Smith   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1297cd6b891eSBarry Smith   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1298c4319e64SHong Zhang 
12996bf464f9SBarry Smith   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
13006bf464f9SBarry Smith   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1301bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
1302901853e0SKris Buschelman 
1303dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
13040298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",NULL);CHKERRQ(ierr);
13050298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
13060298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
13070298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",NULL);CHKERRQ(ierr);
13080298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",NULL);CHKERRQ(ierr);
13090298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",NULL);CHKERRQ(ierr);
13100298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
13110298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
13120298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",NULL);CHKERRQ(ierr);
13130298fd71SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",NULL);CHKERRQ(ierr);
13142d61bbb3SSatish Balay   PetscFunctionReturn(0);
13152d61bbb3SSatish Balay }
13162d61bbb3SSatish Balay 
13174a2ae208SSatish Balay #undef __FUNCT__
13184a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ"
1319ace3abfcSBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
13202d61bbb3SSatish Balay {
13212d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
132263ba0a88SBarry Smith   PetscErrorCode ierr;
13232d61bbb3SSatish Balay 
13242d61bbb3SSatish Balay   PetscFunctionBegin;
1325aa275fccSKris Buschelman   switch (op) {
1326aa275fccSKris Buschelman   case MAT_ROW_ORIENTED:
13274e0d8c25SBarry Smith     a->roworiented = flg;
1328aa275fccSKris Buschelman     break;
1329a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1330a9817697SBarry Smith     a->keepnonzeropattern = flg;
1331aa275fccSKris Buschelman     break;
1332512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1333512a5fc5SBarry Smith     a->nonew = (flg ? 0 : 1);
1334aa275fccSKris Buschelman     break;
1335aa275fccSKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
13364e0d8c25SBarry Smith     a->nonew = (flg ? -1 : 0);
1337aa275fccSKris Buschelman     break;
1338aa275fccSKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
13394e0d8c25SBarry Smith     a->nonew = (flg ? -2 : 0);
1340aa275fccSKris Buschelman     break;
134128b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
134228b2fa4aSMatthew Knepley     a->nounused = (flg ? -1 : 0);
134328b2fa4aSMatthew Knepley     break;
1344cd6b891eSBarry Smith   case MAT_CHECK_COMPRESSED_ROW:
1345cd6b891eSBarry Smith     a->compressedrow.check = flg;
1346cd6b891eSBarry Smith     break;
13474e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
1348aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1349aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
1350290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1351aa275fccSKris Buschelman     break;
13525021d80fSJed Brown   case MAT_SPD:
135377e54ba9SKris Buschelman   case MAT_SYMMETRIC:
135477e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
13559a4540c5SBarry Smith   case MAT_HERMITIAN:
13569a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
13575021d80fSJed Brown     /* These options are handled directly by MatSetOption() */
135877e54ba9SKris Buschelman     break;
1359aa275fccSKris Buschelman   default:
1360e32f2f54SBarry Smith     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
13612d61bbb3SSatish Balay   }
13622d61bbb3SSatish Balay   PetscFunctionReturn(0);
13632d61bbb3SSatish Balay }
13642d61bbb3SSatish Balay 
13654a2ae208SSatish Balay #undef __FUNCT__
13664a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ"
1367c1ac3661SBarry Smith PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
13682d61bbb3SSatish Balay {
13692d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
13706849ba73SBarry Smith   PetscErrorCode ierr;
1371c1ac3661SBarry Smith   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
13723f1db9ecSBarry Smith   MatScalar      *aa,*aa_i;
137387828ca2SBarry Smith   PetscScalar    *v_i;
13742d61bbb3SSatish Balay 
13752d61bbb3SSatish Balay   PetscFunctionBegin;
1376d0f46423SBarry Smith   bs  = A->rmap->bs;
13772d61bbb3SSatish Balay   ai  = a->i;
13782d61bbb3SSatish Balay   aj  = a->j;
13792d61bbb3SSatish Balay   aa  = a->a;
13802d61bbb3SSatish Balay   bs2 = a->bs2;
13812d61bbb3SSatish Balay 
1382e32f2f54SBarry Smith   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
13832d61bbb3SSatish Balay 
13842d61bbb3SSatish Balay   bn  = row/bs;   /* Block number */
13852d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
13862d61bbb3SSatish Balay   M   = ai[bn+1] - ai[bn];
13872d61bbb3SSatish Balay   *nz = bs*M;
13882d61bbb3SSatish Balay 
13892d61bbb3SSatish Balay   if (v) {
13902d61bbb3SSatish Balay     *v = 0;
13912d61bbb3SSatish Balay     if (*nz) {
139287828ca2SBarry Smith       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
13932d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
13942d61bbb3SSatish Balay         v_i  = *v + i*bs;
13952d61bbb3SSatish Balay         aa_i = aa + bs2*(ai[bn] + i);
139626fbe8dcSKarl Rupp         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
13972d61bbb3SSatish Balay       }
13982d61bbb3SSatish Balay     }
13992d61bbb3SSatish Balay   }
14002d61bbb3SSatish Balay 
14012d61bbb3SSatish Balay   if (idx) {
14022d61bbb3SSatish Balay     *idx = 0;
14032d61bbb3SSatish Balay     if (*nz) {
1404c1ac3661SBarry Smith       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
14052d61bbb3SSatish Balay       for (i=0; i<M; i++) { /* for each block in the block row */
14062d61bbb3SSatish Balay         idx_i = *idx + i*bs;
14072d61bbb3SSatish Balay         itmp  = bs*aj[ai[bn] + i];
140826fbe8dcSKarl Rupp         for (j=0; j<bs; j++) idx_i[j] = itmp++;
14092d61bbb3SSatish Balay       }
14102d61bbb3SSatish Balay     }
14112d61bbb3SSatish Balay   }
14122d61bbb3SSatish Balay   PetscFunctionReturn(0);
14132d61bbb3SSatish Balay }
14142d61bbb3SSatish Balay 
14154a2ae208SSatish Balay #undef __FUNCT__
14164a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1417c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
14182d61bbb3SSatish Balay {
1419dfbe8321SBarry Smith   PetscErrorCode ierr;
1420606d414cSSatish Balay 
14212d61bbb3SSatish Balay   PetscFunctionBegin;
142205b42c5fSBarry Smith   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
142305b42c5fSBarry Smith   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
14242d61bbb3SSatish Balay   PetscFunctionReturn(0);
14252d61bbb3SSatish Balay }
14262d61bbb3SSatish Balay 
1427fca92195SBarry Smith extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1428fca92195SBarry Smith 
14294a2ae208SSatish Balay #undef __FUNCT__
14304a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ"
1431fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
14322d61bbb3SSatish Balay {
14332d61bbb3SSatish Balay   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
14342d61bbb3SSatish Balay   Mat            C;
14356849ba73SBarry Smith   PetscErrorCode ierr;
1436d0f46423SBarry Smith   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1437c1ac3661SBarry Smith   PetscInt       *rows,*cols,bs2=a->bs2;
1438dd6ea824SBarry Smith   MatScalar      *array;
14392d61bbb3SSatish Balay 
14402d61bbb3SSatish Balay   PetscFunctionBegin;
1441e32f2f54SBarry Smith   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1442fc4dec0aSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1443c1ac3661SBarry Smith     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1444c1ac3661SBarry Smith     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
14452d61bbb3SSatish Balay 
14462d61bbb3SSatish Balay     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1447ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1448d0f46423SBarry Smith     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
14497adad957SLisandro Dalcin     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1450ecd8bba6SJed Brown     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1451606d414cSSatish Balay     ierr = PetscFree(col);CHKERRQ(ierr);
1452fc4dec0aSBarry Smith   } else {
1453fc4dec0aSBarry Smith     C = *B;
1454fc4dec0aSBarry Smith   }
1455fc4dec0aSBarry Smith 
1456fc4dec0aSBarry Smith   array = a->a;
1457fca92195SBarry Smith   ierr  = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
14582d61bbb3SSatish Balay   for (i=0; i<mbs; i++) {
14592d61bbb3SSatish Balay     cols[0] = i*bs;
14602d61bbb3SSatish Balay     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
14612d61bbb3SSatish Balay     len = ai[i+1] - ai[i];
14622d61bbb3SSatish Balay     for (j=0; j<len; j++) {
14632d61bbb3SSatish Balay       rows[0] = (*aj++)*bs;
14642d61bbb3SSatish Balay       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1465fca92195SBarry Smith       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
14662d61bbb3SSatish Balay       array += bs2;
14672d61bbb3SSatish Balay     }
14682d61bbb3SSatish Balay   }
1469fca92195SBarry Smith   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
14702d61bbb3SSatish Balay 
14712d61bbb3SSatish Balay   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
14722d61bbb3SSatish Balay   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
14732d61bbb3SSatish Balay 
1474815cbec1SBarry Smith   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
14752d61bbb3SSatish Balay     *B = C;
14762d61bbb3SSatish Balay   } else {
1477eb6b5d47SBarry Smith     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
14782d61bbb3SSatish Balay   }
14792d61bbb3SSatish Balay   PetscFunctionReturn(0);
14802d61bbb3SSatish Balay }
14812d61bbb3SSatish Balay 
1482453d3561SHong Zhang #undef __FUNCT__
1483453d3561SHong Zhang #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1484453d3561SHong Zhang PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1485453d3561SHong Zhang {
1486453d3561SHong Zhang   PetscErrorCode ierr;
1487453d3561SHong Zhang   Mat            Btrans;
1488453d3561SHong Zhang 
1489453d3561SHong Zhang   PetscFunctionBegin;
1490453d3561SHong Zhang   *f   = PETSC_FALSE;
1491453d3561SHong Zhang   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1492453d3561SHong Zhang   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1493453d3561SHong Zhang   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1494453d3561SHong Zhang   PetscFunctionReturn(0);
1495453d3561SHong Zhang }
1496453d3561SHong Zhang 
14974a2ae208SSatish Balay #undef __FUNCT__
14984a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary"
14996849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
15002593348eSBarry Smith {
1501b6490206SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
15026849ba73SBarry Smith   PetscErrorCode ierr;
1503d0f46423SBarry Smith   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1504b24ad042SBarry Smith   int            fd;
150587828ca2SBarry Smith   PetscScalar    *aa;
1506ce6f0cecSBarry Smith   FILE           *file;
15072593348eSBarry Smith 
15083a40ed3dSBarry Smith   PetscFunctionBegin;
1509b0a32e0cSBarry Smith   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1510d0f46423SBarry Smith   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
15110700a824SBarry Smith   col_lens[0] = MAT_FILE_CLASSID;
15123b2fbd54SBarry Smith 
1513d0f46423SBarry Smith   col_lens[1] = A->rmap->N;
1514d0f46423SBarry Smith   col_lens[2] = A->cmap->n;
15157e67e3f9SSatish Balay   col_lens[3] = a->nz*bs2;
15162593348eSBarry Smith 
15172593348eSBarry Smith   /* store lengths of each row and write (including header) to file */
1518b6490206SBarry Smith   count = 0;
1519b6490206SBarry Smith   for (i=0; i<a->mbs; i++) {
1520b6490206SBarry Smith     for (j=0; j<bs; j++) {
1521b6490206SBarry Smith       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1522b6490206SBarry Smith     }
15232593348eSBarry Smith   }
1524d0f46423SBarry Smith   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1525606d414cSSatish Balay   ierr = PetscFree(col_lens);CHKERRQ(ierr);
15262593348eSBarry Smith 
15272593348eSBarry Smith   /* store column indices (zero start index) */
1528c1ac3661SBarry Smith   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1529b6490206SBarry Smith   count = 0;
1530b6490206SBarry Smith   for (i=0; i<a->mbs; i++) {
1531b6490206SBarry Smith     for (j=0; j<bs; j++) {
1532b6490206SBarry Smith       for (k=a->i[i]; k<a->i[i+1]; k++) {
1533b6490206SBarry Smith         for (l=0; l<bs; l++) {
1534b6490206SBarry Smith           jj[count++] = bs*a->j[k] + l;
15352593348eSBarry Smith         }
15362593348eSBarry Smith       }
1537b6490206SBarry Smith     }
1538b6490206SBarry Smith   }
15396f69ff64SBarry Smith   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1540606d414cSSatish Balay   ierr = PetscFree(jj);CHKERRQ(ierr);
15412593348eSBarry Smith 
15422593348eSBarry Smith   /* store nonzero values */
154387828ca2SBarry Smith   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1544b6490206SBarry Smith   count = 0;
1545b6490206SBarry Smith   for (i=0; i<a->mbs; i++) {
1546b6490206SBarry Smith     for (j=0; j<bs; j++) {
1547b6490206SBarry Smith       for (k=a->i[i]; k<a->i[i+1]; k++) {
1548b6490206SBarry Smith         for (l=0; l<bs; l++) {
15497e67e3f9SSatish Balay           aa[count++] = a->a[bs2*k + l*bs + j];
1550b6490206SBarry Smith         }
1551b6490206SBarry Smith       }
1552b6490206SBarry Smith     }
1553b6490206SBarry Smith   }
15546f69ff64SBarry Smith   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1555606d414cSSatish Balay   ierr = PetscFree(aa);CHKERRQ(ierr);
1556ce6f0cecSBarry Smith 
1557b0a32e0cSBarry Smith   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1558ce6f0cecSBarry Smith   if (file) {
1559d0f46423SBarry Smith     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1560ce6f0cecSBarry Smith   }
15613a40ed3dSBarry Smith   PetscFunctionReturn(0);
15622593348eSBarry Smith }
15632593348eSBarry Smith 
15644a2ae208SSatish Balay #undef __FUNCT__
15654a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
15666849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
15672593348eSBarry Smith {
1568b6490206SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1569dfbe8321SBarry Smith   PetscErrorCode    ierr;
1570d0f46423SBarry Smith   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1571f3ef73ceSBarry Smith   PetscViewerFormat format;
15722593348eSBarry Smith 
15733a40ed3dSBarry Smith   PetscFunctionBegin;
1574b0a32e0cSBarry Smith   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1575456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
157677431f27SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1577fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1578bcd9e38bSBarry Smith     Mat aij;
1579ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1580bcd9e38bSBarry Smith     ierr = MatView(aij,viewer);CHKERRQ(ierr);
15816bf464f9SBarry Smith     ierr = MatDestroy(&aij);CHKERRQ(ierr);
158204929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
158304929863SHong Zhang       PetscFunctionReturn(0);
1584fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1585d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
15867566de4bSShri Abhyankar     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
158744cd7ae7SLois Curfman McInnes     for (i=0; i<a->mbs; i++) {
158844cd7ae7SLois Curfman McInnes       for (j=0; j<bs; j++) {
158977431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
159044cd7ae7SLois Curfman McInnes         for (k=a->i[i]; k<a->i[i+1]; k++) {
159144cd7ae7SLois Curfman McInnes           for (l=0; l<bs; l++) {
1592aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
15930e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1594a83599f4SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
15950e6d2581SBarry Smith                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15960e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1597a83599f4SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
15980e6d2581SBarry Smith                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
15990e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1600a83599f4SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
16010ef38995SBarry Smith             }
160244cd7ae7SLois Curfman McInnes #else
16030ef38995SBarry Smith             if (a->a[bs2*k + l*bs + j] != 0.0) {
1604a83599f4SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
16050ef38995SBarry Smith             }
160644cd7ae7SLois Curfman McInnes #endif
160744cd7ae7SLois Curfman McInnes           }
160844cd7ae7SLois Curfman McInnes         }
1609b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
161044cd7ae7SLois Curfman McInnes       }
161144cd7ae7SLois Curfman McInnes     }
1612d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
16130ef38995SBarry Smith   } else {
1614d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
16157566de4bSShri Abhyankar     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1616b6490206SBarry Smith     for (i=0; i<a->mbs; i++) {
1617b6490206SBarry Smith       for (j=0; j<bs; j++) {
161877431f27SBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1619b6490206SBarry Smith         for (k=a->i[i]; k<a->i[i+1]; k++) {
1620b6490206SBarry Smith           for (l=0; l<bs; l++) {
1621aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
16220e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1623a83599f4SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
16240e6d2581SBarry Smith                                             PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
16250e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1626a83599f4SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
16270e6d2581SBarry Smith                                             PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
16280ef38995SBarry Smith             } else {
1629a83599f4SBarry Smith               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
163088685aaeSLois Curfman McInnes             }
163188685aaeSLois Curfman McInnes #else
1632a83599f4SBarry Smith             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
163388685aaeSLois Curfman McInnes #endif
16342593348eSBarry Smith           }
16352593348eSBarry Smith         }
1636b0a32e0cSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
16372593348eSBarry Smith       }
16382593348eSBarry Smith     }
1639d00279f6SBarry Smith     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1640b6490206SBarry Smith   }
1641b0a32e0cSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
16423a40ed3dSBarry Smith   PetscFunctionReturn(0);
16432593348eSBarry Smith }
16442593348eSBarry Smith 
16459804daf3SBarry Smith #include <petscdraw.h>
16464a2ae208SSatish Balay #undef __FUNCT__
16474a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
16486849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
16493270192aSSatish Balay {
165077ed5343SBarry Smith   Mat               A = (Mat) Aa;
16513270192aSSatish Balay   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
16526849ba73SBarry Smith   PetscErrorCode    ierr;
1653d0f46423SBarry Smith   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
16540e6d2581SBarry Smith   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
16553f1db9ecSBarry Smith   MatScalar         *aa;
1656b0a32e0cSBarry Smith   PetscViewer       viewer;
1657b3e7f47fSJed Brown   PetscViewerFormat format;
16583270192aSSatish Balay 
16593a40ed3dSBarry Smith   PetscFunctionBegin;
166077ed5343SBarry Smith   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1661b3e7f47fSJed Brown   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
166277ed5343SBarry Smith 
1663b0a32e0cSBarry Smith   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
166477ed5343SBarry Smith 
16653270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1666b3e7f47fSJed Brown 
1667b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1668b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
16693270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16703270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1671d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16723270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16733270192aSSatish Balay         aa  = a->a + j*bs2;
16743270192aSSatish Balay         for (k=0; k<bs; k++) {
16753270192aSSatish Balay           for (l=0; l<bs; l++) {
16760e6d2581SBarry Smith             if (PetscRealPart(*aa++) >=  0.) continue;
1677b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16783270192aSSatish Balay           }
16793270192aSSatish Balay         }
16803270192aSSatish Balay       }
16813270192aSSatish Balay     }
1682b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
16833270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16843270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1685d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
16863270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
16873270192aSSatish Balay         aa  = a->a + j*bs2;
16883270192aSSatish Balay         for (k=0; k<bs; k++) {
16893270192aSSatish Balay           for (l=0; l<bs; l++) {
16900e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
1691b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
16923270192aSSatish Balay           }
16933270192aSSatish Balay         }
16943270192aSSatish Balay       }
16953270192aSSatish Balay     }
1696b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
16973270192aSSatish Balay     for (i=0,row=0; i<mbs; i++,row+=bs) {
16983270192aSSatish Balay       for (j=a->i[i]; j<a->i[i+1]; j++) {
1699d0f46423SBarry Smith         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
17003270192aSSatish Balay         x_l = a->j[j]*bs; x_r = x_l + 1.0;
17013270192aSSatish Balay         aa  = a->a + j*bs2;
17023270192aSSatish Balay         for (k=0; k<bs; k++) {
17033270192aSSatish Balay           for (l=0; l<bs; l++) {
17040e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
1705b0a32e0cSBarry Smith             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
17063270192aSSatish Balay           }
17073270192aSSatish Balay         }
17083270192aSSatish Balay       }
17093270192aSSatish Balay     }
1710b3e7f47fSJed Brown   } else {
1711b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1712b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1713b3e7f47fSJed Brown     PetscDraw popup;
1714b3e7f47fSJed Brown     PetscReal scale,maxv = 0.0;
1715b3e7f47fSJed Brown 
1716b3e7f47fSJed Brown     for (i=0; i<a->nz*a->bs2; i++) {
1717b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1718b3e7f47fSJed Brown     }
1719b3e7f47fSJed Brown     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1720b3e7f47fSJed Brown     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
172126fbe8dcSKarl Rupp     if (popup) {
172226fbe8dcSKarl Rupp       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
172326fbe8dcSKarl Rupp     }
1724b3e7f47fSJed Brown     for (i=0,row=0; i<mbs; i++,row+=bs) {
1725b3e7f47fSJed Brown       for (j=a->i[i]; j<a->i[i+1]; j++) {
1726b3e7f47fSJed Brown         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1727b3e7f47fSJed Brown         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1728b3e7f47fSJed Brown         aa  = a->a + j*bs2;
1729b3e7f47fSJed Brown         for (k=0; k<bs; k++) {
1730b3e7f47fSJed Brown           for (l=0; l<bs; l++) {
1731b3e7f47fSJed Brown             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1732b3e7f47fSJed Brown             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1733b3e7f47fSJed Brown           }
1734b3e7f47fSJed Brown         }
1735b3e7f47fSJed Brown       }
1736b3e7f47fSJed Brown     }
1737b3e7f47fSJed Brown   }
173877ed5343SBarry Smith   PetscFunctionReturn(0);
173977ed5343SBarry Smith }
17403270192aSSatish Balay 
17414a2ae208SSatish Balay #undef __FUNCT__
17424a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw"
17436849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
174477ed5343SBarry Smith {
1745dfbe8321SBarry Smith   PetscErrorCode ierr;
17460e6d2581SBarry Smith   PetscReal      xl,yl,xr,yr,w,h;
1747b0a32e0cSBarry Smith   PetscDraw      draw;
1748ace3abfcSBarry Smith   PetscBool      isnull;
17493270192aSSatish Balay 
175077ed5343SBarry Smith   PetscFunctionBegin;
1751b0a32e0cSBarry Smith   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1752b0a32e0cSBarry Smith   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
175377ed5343SBarry Smith 
175477ed5343SBarry Smith   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1755d0f46423SBarry Smith   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
175677ed5343SBarry Smith   xr  += w;    yr += h;  xl = -w;     yl = -h;
1757b0a32e0cSBarry Smith   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1758b0a32e0cSBarry Smith   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
17590298fd71SBarry Smith   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
17603a40ed3dSBarry Smith   PetscFunctionReturn(0);
17613270192aSSatish Balay }
17623270192aSSatish Balay 
17634a2ae208SSatish Balay #undef __FUNCT__
17644a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ"
1765dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
17662593348eSBarry Smith {
1767dfbe8321SBarry Smith   PetscErrorCode ierr;
1768ace3abfcSBarry Smith   PetscBool      iascii,isbinary,isdraw;
17692593348eSBarry Smith 
17703a40ed3dSBarry Smith   PetscFunctionBegin;
1771251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1772251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1773251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
177432077d6dSBarry Smith   if (iascii) {
17753a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
17760f5bd95cSBarry Smith   } else if (isbinary) {
17773a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
17780f5bd95cSBarry Smith   } else if (isdraw) {
17793a40ed3dSBarry Smith     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
17805cd90555SBarry Smith   } else {
1781a5e6ed63SBarry Smith     Mat B;
1782ceb03754SKris Buschelman     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1783a5e6ed63SBarry Smith     ierr = MatView(B,viewer);CHKERRQ(ierr);
17846bf464f9SBarry Smith     ierr = MatDestroy(&B);CHKERRQ(ierr);
17852593348eSBarry Smith   }
17863a40ed3dSBarry Smith   PetscFunctionReturn(0);
17872593348eSBarry Smith }
1788b6490206SBarry Smith 
1789cd0e1443SSatish Balay 
17904a2ae208SSatish Balay #undef __FUNCT__
17914a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ"
1792c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1793cd0e1443SSatish Balay {
1794cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1795c1ac3661SBarry Smith   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1796c1ac3661SBarry Smith   PetscInt    *ai = a->i,*ailen = a->ilen;
1797d0f46423SBarry Smith   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
179897e567efSBarry Smith   MatScalar   *ap,*aa = a->a;
1799cd0e1443SSatish Balay 
18003a40ed3dSBarry Smith   PetscFunctionBegin;
18012d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over rows */
1802cd0e1443SSatish Balay     row = im[k]; brow = row/bs;
1803e32f2f54SBarry Smith     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1804e32f2f54SBarry Smith     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
18052d61bbb3SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
18062c3acbe9SBarry Smith     nrow = ailen[brow];
18072d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over columns */
1808e32f2f54SBarry Smith       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1809e32f2f54SBarry Smith       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
18102d61bbb3SSatish Balay       col  = in[l];
18112d61bbb3SSatish Balay       bcol = col/bs;
18122d61bbb3SSatish Balay       cidx = col%bs;
18132d61bbb3SSatish Balay       ridx = row%bs;
18142d61bbb3SSatish Balay       high = nrow;
18152d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
18162d61bbb3SSatish Balay       while (high-low > 5) {
1817cd0e1443SSatish Balay         t = (low+high)/2;
1818cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
1819cd0e1443SSatish Balay         else             low  = t;
1820cd0e1443SSatish Balay       }
1821cd0e1443SSatish Balay       for (i=low; i<high; i++) {
1822cd0e1443SSatish Balay         if (rp[i] > bcol) break;
1823cd0e1443SSatish Balay         if (rp[i] == bcol) {
18242d61bbb3SSatish Balay           *v++ = ap[bs2*i+bs*cidx+ridx];
18252d61bbb3SSatish Balay           goto finished;
1826cd0e1443SSatish Balay         }
1827cd0e1443SSatish Balay       }
182897e567efSBarry Smith       *v++ = 0.0;
18292d61bbb3SSatish Balay finished:;
1830cd0e1443SSatish Balay     }
1831cd0e1443SSatish Balay   }
18323a40ed3dSBarry Smith   PetscFunctionReturn(0);
1833cd0e1443SSatish Balay }
1834cd0e1443SSatish Balay 
18354a2ae208SSatish Balay #undef __FUNCT__
18364a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1837dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
183892c4ed94SBarry Smith {
183992c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1840e2ee6c50SBarry Smith   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1841c1ac3661SBarry Smith   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
18426849ba73SBarry Smith   PetscErrorCode    ierr;
1843d0f46423SBarry Smith   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1844ace3abfcSBarry Smith   PetscBool         roworiented=a->roworiented;
1845dd6ea824SBarry Smith   const PetscScalar *value     = v;
1846f15d580aSBarry Smith   MatScalar         *ap,*aa = a->a,*bap;
184792c4ed94SBarry Smith 
18483a40ed3dSBarry Smith   PetscFunctionBegin;
18490e324ae4SSatish Balay   if (roworiented) {
18500e324ae4SSatish Balay     stepval = (n-1)*bs;
18510e324ae4SSatish Balay   } else {
18520e324ae4SSatish Balay     stepval = (m-1)*bs;
18530e324ae4SSatish Balay   }
185492c4ed94SBarry Smith   for (k=0; k<m; k++) { /* loop over added rows */
185592c4ed94SBarry Smith     row = im[k];
18565ef9f2a5SBarry Smith     if (row < 0) continue;
18572515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
1858e32f2f54SBarry Smith     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
185992c4ed94SBarry Smith #endif
186092c4ed94SBarry Smith     rp   = aj + ai[row];
186192c4ed94SBarry Smith     ap   = aa + bs2*ai[row];
186292c4ed94SBarry Smith     rmax = imax[row];
186392c4ed94SBarry Smith     nrow = ailen[row];
186492c4ed94SBarry Smith     low  = 0;
1865c71e6ed7SBarry Smith     high = nrow;
186692c4ed94SBarry Smith     for (l=0; l<n; l++) { /* loop over added columns */
18675ef9f2a5SBarry Smith       if (in[l] < 0) continue;
18682515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
1869e32f2f54SBarry Smith       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
187092c4ed94SBarry Smith #endif
187192c4ed94SBarry Smith       col = in[l];
187292c4ed94SBarry Smith       if (roworiented) {
187353ef36baSBarry Smith         value = v + (k*(stepval+bs) + l)*bs;
18740e324ae4SSatish Balay       } else {
187553ef36baSBarry Smith         value = v + (l*(stepval+bs) + k)*bs;
187692c4ed94SBarry Smith       }
187726fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
187826fbe8dcSKarl Rupp       else high = nrow;
1879e2ee6c50SBarry Smith       lastcol = col;
188092c4ed94SBarry Smith       while (high-low > 7) {
188192c4ed94SBarry Smith         t = (low+high)/2;
188292c4ed94SBarry Smith         if (rp[t] > col) high = t;
188392c4ed94SBarry Smith         else             low  = t;
188492c4ed94SBarry Smith       }
188592c4ed94SBarry Smith       for (i=low; i<high; i++) {
188692c4ed94SBarry Smith         if (rp[i] > col) break;
188792c4ed94SBarry Smith         if (rp[i] == col) {
18888a84c255SSatish Balay           bap = ap +  bs2*i;
18890e324ae4SSatish Balay           if (roworiented) {
18908a84c255SSatish Balay             if (is == ADD_VALUES) {
1891dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1892dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
18938a84c255SSatish Balay                   bap[jj] += *value++;
1894dd9472c6SBarry Smith                 }
1895dd9472c6SBarry Smith               }
18960e324ae4SSatish Balay             } else {
1897dd9472c6SBarry Smith               for (ii=0; ii<bs; ii++,value+=stepval) {
1898dd9472c6SBarry Smith                 for (jj=ii; jj<bs2; jj+=bs) {
18990e324ae4SSatish Balay                   bap[jj] = *value++;
19008a84c255SSatish Balay                 }
1901dd9472c6SBarry Smith               }
1902dd9472c6SBarry Smith             }
19030e324ae4SSatish Balay           } else {
19040e324ae4SSatish Balay             if (is == ADD_VALUES) {
190553ef36baSBarry Smith               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1906dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
190753ef36baSBarry Smith                   bap[jj] += value[jj];
1908dd9472c6SBarry Smith                 }
190953ef36baSBarry Smith                 bap += bs;
1910dd9472c6SBarry Smith               }
19110e324ae4SSatish Balay             } else {
191253ef36baSBarry Smith               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1913dd9472c6SBarry Smith                 for (jj=0; jj<bs; jj++) {
191453ef36baSBarry Smith                   bap[jj]  = value[jj];
19150e324ae4SSatish Balay                 }
191653ef36baSBarry Smith                 bap += bs;
19178a84c255SSatish Balay               }
1918dd9472c6SBarry Smith             }
1919dd9472c6SBarry Smith           }
1920f1241b54SBarry Smith           goto noinsert2;
192192c4ed94SBarry Smith         }
192292c4ed94SBarry Smith       }
192389280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
1924e32f2f54SBarry Smith       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1925fef13f97SBarry Smith       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1926c03d1d03SSatish Balay       N = nrow++ - 1; high++;
192792c4ed94SBarry Smith       /* shift up all the later entries in this row */
192892c4ed94SBarry Smith       for (ii=N; ii>=i; ii--) {
192992c4ed94SBarry Smith         rp[ii+1] = rp[ii];
1930549d3d68SSatish Balay         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
193192c4ed94SBarry Smith       }
1932549d3d68SSatish Balay       if (N >= i) {
1933549d3d68SSatish Balay         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1934549d3d68SSatish Balay       }
193592c4ed94SBarry Smith       rp[i] = col;
19368a84c255SSatish Balay       bap   = ap +  bs2*i;
19370e324ae4SSatish Balay       if (roworiented) {
1938dd9472c6SBarry Smith         for (ii=0; ii<bs; ii++,value+=stepval) {
1939dd9472c6SBarry Smith           for (jj=ii; jj<bs2; jj+=bs) {
19400e324ae4SSatish Balay             bap[jj] = *value++;
1941dd9472c6SBarry Smith           }
1942dd9472c6SBarry Smith         }
19430e324ae4SSatish Balay       } else {
1944dd9472c6SBarry Smith         for (ii=0; ii<bs; ii++,value+=stepval) {
1945dd9472c6SBarry Smith           for (jj=0; jj<bs; jj++) {
19460e324ae4SSatish Balay             *bap++ = *value++;
19470e324ae4SSatish Balay           }
1948dd9472c6SBarry Smith         }
1949dd9472c6SBarry Smith       }
1950f1241b54SBarry Smith noinsert2:;
195192c4ed94SBarry Smith       low = i;
195292c4ed94SBarry Smith     }
195392c4ed94SBarry Smith     ailen[row] = nrow;
195492c4ed94SBarry Smith   }
19553a40ed3dSBarry Smith   PetscFunctionReturn(0);
195692c4ed94SBarry Smith }
195726e093fcSHong Zhang 
19584a2ae208SSatish Balay #undef __FUNCT__
19594a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1960dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1961584200bdSSatish Balay {
1962584200bdSSatish Balay   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1963c1ac3661SBarry Smith   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1964d0f46423SBarry Smith   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
19656849ba73SBarry Smith   PetscErrorCode ierr;
1966c1ac3661SBarry Smith   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
19673f1db9ecSBarry Smith   MatScalar      *aa  = a->a,*ap;
19683447b6efSHong Zhang   PetscReal      ratio=0.6;
1969584200bdSSatish Balay 
19703a40ed3dSBarry Smith   PetscFunctionBegin;
19713a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1972584200bdSSatish Balay 
197343ee02c3SBarry Smith   if (m) rmax = ailen[0];
1974584200bdSSatish Balay   for (i=1; i<mbs; i++) {
1975584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
1976584200bdSSatish Balay     fshift += imax[i-1] - ailen[i-1];
1977d402145bSBarry Smith     rmax    = PetscMax(rmax,ailen[i]);
1978584200bdSSatish Balay     if (fshift) {
1979a7c10996SSatish Balay       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1980584200bdSSatish Balay       N  = ailen[i];
1981584200bdSSatish Balay       for (j=0; j<N; j++) {
1982584200bdSSatish Balay         ip[j-fshift] = ip[j];
198326fbe8dcSKarl Rupp 
1984549d3d68SSatish Balay         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1985584200bdSSatish Balay       }
1986584200bdSSatish Balay     }
1987584200bdSSatish Balay     ai[i] = ai[i-1] + ailen[i-1];
1988584200bdSSatish Balay   }
1989584200bdSSatish Balay   if (mbs) {
1990584200bdSSatish Balay     fshift += imax[mbs-1] - ailen[mbs-1];
1991584200bdSSatish Balay     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1992584200bdSSatish Balay   }
1993584200bdSSatish Balay   /* reset ilen and imax for each row */
1994584200bdSSatish Balay   for (i=0; i<mbs; i++) {
1995584200bdSSatish Balay     ailen[i] = imax[i] = ai[i+1] - ai[i];
1996584200bdSSatish Balay   }
1997a7c10996SSatish Balay   a->nz = ai[mbs];
1998584200bdSSatish Balay 
1999584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2000b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
2001584200bdSSatish Balay   if (fshift && a->diag) {
2002606d414cSSatish Balay     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
200352e6d16bSBarry Smith     ierr    = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2004584200bdSSatish Balay     a->diag = 0;
2005584200bdSSatish Balay   }
200665e19b50SBarry Smith   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2007d0f46423SBarry Smith   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2008ae15b995SBarry Smith   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2009ae15b995SBarry Smith   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
201026fbe8dcSKarl Rupp 
20118e58a170SBarry Smith   A->info.mallocs    += a->reallocs;
2012e2f3b5e9SSatish Balay   a->reallocs         = 0;
20130e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2014cf4441caSHong Zhang 
2015cd6b891eSBarry Smith   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
201626fbe8dcSKarl Rupp 
201788e51ccdSHong Zhang   A->same_nonzero = PETSC_TRUE;
20183a40ed3dSBarry Smith   PetscFunctionReturn(0);
2019584200bdSSatish Balay }
2020584200bdSSatish Balay 
2021bea157c4SSatish Balay /*
2022bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2023bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2024bea157c4SSatish Balay    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2025bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2026bea157c4SSatish Balay */
20274a2ae208SSatish Balay #undef __FUNCT__
20284a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2029c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2030d9b7c43dSSatish Balay {
2031c1ac3661SBarry Smith   PetscInt  i,j,k,row;
2032ace3abfcSBarry Smith   PetscBool flg;
20333a40ed3dSBarry Smith 
2034433994e6SBarry Smith   PetscFunctionBegin;
2035bea157c4SSatish Balay   for (i=0,j=0; i<n; j++) {
2036bea157c4SSatish Balay     row = idx[i];
2037bea157c4SSatish Balay     if (row%bs!=0) { /* Not the begining of a block */
2038bea157c4SSatish Balay       sizes[j] = 1;
2039bea157c4SSatish Balay       i++;
2040e4fda26cSSatish Balay     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2041bea157c4SSatish Balay       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2042bea157c4SSatish Balay       i++;
2043bea157c4SSatish Balay     } else { /* Begining of the block, so check if the complete block exists */
2044bea157c4SSatish Balay       flg = PETSC_TRUE;
2045bea157c4SSatish Balay       for (k=1; k<bs; k++) {
2046bea157c4SSatish Balay         if (row+k != idx[i+k]) { /* break in the block */
2047bea157c4SSatish Balay           flg = PETSC_FALSE;
2048bea157c4SSatish Balay           break;
2049d9b7c43dSSatish Balay         }
2050bea157c4SSatish Balay       }
2051abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2052bea157c4SSatish Balay         sizes[j] = bs;
2053bea157c4SSatish Balay         i       += bs;
2054bea157c4SSatish Balay       } else {
2055bea157c4SSatish Balay         sizes[j] = 1;
2056bea157c4SSatish Balay         i++;
2057bea157c4SSatish Balay       }
2058bea157c4SSatish Balay     }
2059bea157c4SSatish Balay   }
2060bea157c4SSatish Balay   *bs_max = j;
20613a40ed3dSBarry Smith   PetscFunctionReturn(0);
2062d9b7c43dSSatish Balay }
2063d9b7c43dSSatish Balay 
20644a2ae208SSatish Balay #undef __FUNCT__
20654a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ"
20662b40b63fSBarry Smith PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2067d9b7c43dSSatish Balay {
2068d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2069dfbe8321SBarry Smith   PetscErrorCode    ierr;
2070f4df32b1SMatthew Knepley   PetscInt          i,j,k,count,*rows;
2071d0f46423SBarry Smith   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
207287828ca2SBarry Smith   PetscScalar       zero = 0.0;
20733f1db9ecSBarry Smith   MatScalar         *aa;
207497b48c8fSBarry Smith   const PetscScalar *xx;
207597b48c8fSBarry Smith   PetscScalar       *bb;
2076d9b7c43dSSatish Balay 
20773a40ed3dSBarry Smith   PetscFunctionBegin;
207897b48c8fSBarry Smith   /* fix right hand side if needed */
207997b48c8fSBarry Smith   if (x && b) {
208097b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
208197b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
208297b48c8fSBarry Smith     for (i=0; i<is_n; i++) {
208397b48c8fSBarry Smith       bb[is_idx[i]] = diag*xx[is_idx[i]];
208497b48c8fSBarry Smith     }
208597b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
208697b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
208797b48c8fSBarry Smith   }
208897b48c8fSBarry Smith 
2089d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2090bea157c4SSatish Balay   /* allocate memory for rows,sizes */
2091fca92195SBarry Smith   ierr = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2092bea157c4SSatish Balay 
2093563b5814SBarry Smith   /* copy IS values to rows, and sort them */
209426fbe8dcSKarl Rupp   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2095bea157c4SSatish Balay   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
209697b48c8fSBarry Smith 
2097a9817697SBarry Smith   if (baij->keepnonzeropattern) {
209826fbe8dcSKarl Rupp     for (i=0; i<is_n; i++) sizes[i] = 1;
2099dffd3267SBarry Smith     bs_max          = is_n;
210088e51ccdSHong Zhang     A->same_nonzero = PETSC_TRUE;
2101dffd3267SBarry Smith   } else {
2102bea157c4SSatish Balay     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
210326fbe8dcSKarl Rupp 
210488e51ccdSHong Zhang     A->same_nonzero = PETSC_FALSE;
2105dffd3267SBarry Smith   }
2106bea157c4SSatish Balay 
2107bea157c4SSatish Balay   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2108bea157c4SSatish Balay     row = rows[j];
2109e32f2f54SBarry Smith     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2110bea157c4SSatish Balay     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2111b31fbe3bSSatish Balay     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2112a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2113d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2114bea157c4SSatish Balay         if (baij->ilen[row/bs] > 0) {
2115bea157c4SSatish Balay           baij->ilen[row/bs]       = 1;
2116bea157c4SSatish Balay           baij->j[baij->i[row/bs]] = row/bs;
211726fbe8dcSKarl Rupp 
2118bea157c4SSatish Balay           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2119a07cd24cSSatish Balay         }
2120563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
2121bea157c4SSatish Balay         for (k=0; k<bs; k++) {
2122f4df32b1SMatthew Knepley           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2123bea157c4SSatish Balay         }
2124f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2125bea157c4SSatish Balay         baij->ilen[row/bs] = 0;
2126f4df32b1SMatthew Knepley       } /* end (diag == 0.0) */
2127bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
2128aa482453SBarry Smith #if defined(PETSC_USE_DEBUG)
2129e32f2f54SBarry Smith       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2130bea157c4SSatish Balay #endif
2131bea157c4SSatish Balay       for (k=0; k<count; k++) {
2132d9b7c43dSSatish Balay         aa[0] =  zero;
2133d9b7c43dSSatish Balay         aa   += bs;
2134d9b7c43dSSatish Balay       }
2135d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2136f4df32b1SMatthew Knepley         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2137d9b7c43dSSatish Balay       }
2138d9b7c43dSSatish Balay     }
2139bea157c4SSatish Balay   }
2140bea157c4SSatish Balay 
2141fca92195SBarry Smith   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
21429a8dea36SBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
21433a40ed3dSBarry Smith   PetscFunctionReturn(0);
2144d9b7c43dSSatish Balay }
21451c351548SSatish Balay 
21464a2ae208SSatish Balay #undef __FUNCT__
214797b48c8fSBarry Smith #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
214897b48c8fSBarry Smith PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
214997b48c8fSBarry Smith {
215097b48c8fSBarry Smith   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
215197b48c8fSBarry Smith   PetscErrorCode    ierr;
215297b48c8fSBarry Smith   PetscInt          i,j,k,count;
215397b48c8fSBarry Smith   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
215497b48c8fSBarry Smith   PetscScalar       zero = 0.0;
215597b48c8fSBarry Smith   MatScalar         *aa;
215697b48c8fSBarry Smith   const PetscScalar *xx;
215797b48c8fSBarry Smith   PetscScalar       *bb;
215856777dd2SBarry Smith   PetscBool         *zeroed,vecs = PETSC_FALSE;
215997b48c8fSBarry Smith 
216097b48c8fSBarry Smith   PetscFunctionBegin;
216197b48c8fSBarry Smith   /* fix right hand side if needed */
216297b48c8fSBarry Smith   if (x && b) {
216397b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
216497b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
216556777dd2SBarry Smith     vecs = PETSC_TRUE;
216697b48c8fSBarry Smith   }
216797b48c8fSBarry Smith   A->same_nonzero = PETSC_TRUE;
216897b48c8fSBarry Smith 
216997b48c8fSBarry Smith   /* zero the columns */
217097b48c8fSBarry Smith   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
217197b48c8fSBarry Smith   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
217297b48c8fSBarry Smith   for (i=0; i<is_n; i++) {
217397b48c8fSBarry Smith     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
217497b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
217597b48c8fSBarry Smith   }
217697b48c8fSBarry Smith   for (i=0; i<A->rmap->N; i++) {
217797b48c8fSBarry Smith     if (!zeroed[i]) {
217897b48c8fSBarry Smith       row = i/bs;
217997b48c8fSBarry Smith       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
218097b48c8fSBarry Smith         for (k=0; k<bs; k++) {
218197b48c8fSBarry Smith           col = bs*baij->j[j] + k;
218297b48c8fSBarry Smith           if (zeroed[col]) {
218397b48c8fSBarry Smith             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
218456777dd2SBarry Smith             if (vecs) bb[i] -= aa[0]*xx[col];
218597b48c8fSBarry Smith             aa[0] = 0.0;
218697b48c8fSBarry Smith           }
218797b48c8fSBarry Smith         }
218897b48c8fSBarry Smith       }
218956777dd2SBarry Smith     } else if (vecs) bb[i] = diag*xx[i];
219097b48c8fSBarry Smith   }
219197b48c8fSBarry Smith   ierr = PetscFree(zeroed);CHKERRQ(ierr);
219256777dd2SBarry Smith   if (vecs) {
219356777dd2SBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
219456777dd2SBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
219556777dd2SBarry Smith   }
219697b48c8fSBarry Smith 
219797b48c8fSBarry Smith   /* zero the rows */
219897b48c8fSBarry Smith   for (i=0; i<is_n; i++) {
219997b48c8fSBarry Smith     row   = is_idx[i];
220097b48c8fSBarry Smith     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
220197b48c8fSBarry Smith     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
220297b48c8fSBarry Smith     for (k=0; k<count; k++) {
220397b48c8fSBarry Smith       aa[0] =  zero;
220497b48c8fSBarry Smith       aa   += bs;
220597b48c8fSBarry Smith     }
2206d4a378daSJed Brown     if (diag != (PetscScalar)0.0) {
220797b48c8fSBarry Smith       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
220897b48c8fSBarry Smith     }
220997b48c8fSBarry Smith   }
221097b48c8fSBarry Smith   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
221197b48c8fSBarry Smith   PetscFunctionReturn(0);
221297b48c8fSBarry Smith }
221397b48c8fSBarry Smith 
221497b48c8fSBarry Smith #undef __FUNCT__
22154a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ"
2216c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
22172d61bbb3SSatish Balay {
22182d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2219e2ee6c50SBarry Smith   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2220c1ac3661SBarry Smith   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2221d0f46423SBarry Smith   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
22226849ba73SBarry Smith   PetscErrorCode ierr;
2223c1ac3661SBarry Smith   PetscInt       ridx,cidx,bs2=a->bs2;
2224ace3abfcSBarry Smith   PetscBool      roworiented=a->roworiented;
22253f1db9ecSBarry Smith   MatScalar      *ap,value,*aa=a->a,*bap;
22262d61bbb3SSatish Balay 
22272d61bbb3SSatish Balay   PetscFunctionBegin;
222871fd2e92SBarry Smith   if (v) PetscValidScalarPointer(v,6);
22292d61bbb3SSatish Balay   for (k=0; k<m; k++) { /* loop over added rows */
2230085a36d4SBarry Smith     row  = im[k];
2231085a36d4SBarry Smith     brow = row/bs;
22325ef9f2a5SBarry Smith     if (row < 0) continue;
22332515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
2234e32f2f54SBarry Smith     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
22352d61bbb3SSatish Balay #endif
22362d61bbb3SSatish Balay     rp   = aj + ai[brow];
22372d61bbb3SSatish Balay     ap   = aa + bs2*ai[brow];
22382d61bbb3SSatish Balay     rmax = imax[brow];
22392d61bbb3SSatish Balay     nrow = ailen[brow];
22402d61bbb3SSatish Balay     low  = 0;
2241c71e6ed7SBarry Smith     high = nrow;
22422d61bbb3SSatish Balay     for (l=0; l<n; l++) { /* loop over added columns */
22435ef9f2a5SBarry Smith       if (in[l] < 0) continue;
22442515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
2245e32f2f54SBarry Smith       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
22462d61bbb3SSatish Balay #endif
22472d61bbb3SSatish Balay       col  = in[l]; bcol = col/bs;
22482d61bbb3SSatish Balay       ridx = row % bs; cidx = col % bs;
22492d61bbb3SSatish Balay       if (roworiented) {
22505ef9f2a5SBarry Smith         value = v[l + k*n];
22512d61bbb3SSatish Balay       } else {
22522d61bbb3SSatish Balay         value = v[k + l*m];
22532d61bbb3SSatish Balay       }
22547cd84e04SBarry Smith       if (col <= lastcol) low = 0; else high = nrow;
2255e2ee6c50SBarry Smith       lastcol = col;
22562d61bbb3SSatish Balay       while (high-low > 7) {
22572d61bbb3SSatish Balay         t = (low+high)/2;
22582d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
22592d61bbb3SSatish Balay         else              low  = t;
22602d61bbb3SSatish Balay       }
22612d61bbb3SSatish Balay       for (i=low; i<high; i++) {
22622d61bbb3SSatish Balay         if (rp[i] > bcol) break;
22632d61bbb3SSatish Balay         if (rp[i] == bcol) {
22642d61bbb3SSatish Balay           bap = ap +  bs2*i + bs*cidx + ridx;
22652d61bbb3SSatish Balay           if (is == ADD_VALUES) *bap += value;
22662d61bbb3SSatish Balay           else                  *bap  = value;
22672d61bbb3SSatish Balay           goto noinsert1;
22682d61bbb3SSatish Balay         }
22692d61bbb3SSatish Balay       }
22702d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
2271e32f2f54SBarry Smith       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2272fef13f97SBarry Smith       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2273c03d1d03SSatish Balay       N = nrow++ - 1; high++;
22742d61bbb3SSatish Balay       /* shift up all the later entries in this row */
22752d61bbb3SSatish Balay       for (ii=N; ii>=i; ii--) {
22762d61bbb3SSatish Balay         rp[ii+1] = rp[ii];
2277549d3d68SSatish Balay         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
22782d61bbb3SSatish Balay       }
2279549d3d68SSatish Balay       if (N>=i) {
2280549d3d68SSatish Balay         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2281549d3d68SSatish Balay       }
22822d61bbb3SSatish Balay       rp[i]                      = bcol;
22832d61bbb3SSatish Balay       ap[bs2*i + bs*cidx + ridx] = value;
2284085a36d4SBarry Smith       a->nz++;
22852d61bbb3SSatish Balay noinsert1:;
22862d61bbb3SSatish Balay       low = i;
22872d61bbb3SSatish Balay     }
22882d61bbb3SSatish Balay     ailen[brow] = nrow;
22892d61bbb3SSatish Balay   }
229088e51ccdSHong Zhang   A->same_nonzero = PETSC_FALSE;
22912d61bbb3SSatish Balay   PetscFunctionReturn(0);
22922d61bbb3SSatish Balay }
22932d61bbb3SSatish Balay 
22944a2ae208SSatish Balay #undef __FUNCT__
22954a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ"
22960481f469SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
22972d61bbb3SSatish Balay {
22982d61bbb3SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
22992d61bbb3SSatish Balay   Mat            outA;
2300dfbe8321SBarry Smith   PetscErrorCode ierr;
2301ace3abfcSBarry Smith   PetscBool      row_identity,col_identity;
23022d61bbb3SSatish Balay 
23032d61bbb3SSatish Balay   PetscFunctionBegin;
2304e32f2f54SBarry Smith   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2305667159a5SBarry Smith   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2306667159a5SBarry Smith   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2307f23aa3ddSBarry Smith   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
23082d61bbb3SSatish Balay 
23092d61bbb3SSatish Balay   outA            = inA;
2310d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
23112d61bbb3SSatish Balay 
2312c4992f7dSBarry Smith   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2313cf242676SKris Buschelman 
2314c38d4ed2SBarry Smith   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
23156bf464f9SBarry Smith   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2316c3122656SLisandro Dalcin   a->row = row;
2317c38d4ed2SBarry Smith   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
23186bf464f9SBarry Smith   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2319c3122656SLisandro Dalcin   a->col = col;
2320c38d4ed2SBarry Smith 
2321c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
23226bf464f9SBarry Smith   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
23234c49b128SBarry Smith   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
232452e6d16bSBarry Smith   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2325c38d4ed2SBarry Smith 
2326ace3abfcSBarry Smith   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2327c38d4ed2SBarry Smith   if (!a->solve_work) {
2328d0f46423SBarry Smith     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2329d0f46423SBarry Smith     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2330c38d4ed2SBarry Smith   }
2331719d5645SBarry Smith   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
23322d61bbb3SSatish Balay   PetscFunctionReturn(0);
23332d61bbb3SSatish Balay }
2334d9b7c43dSSatish Balay 
23354a2ae208SSatish Balay #undef __FUNCT__
23364a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
23377087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
233827a8da17SBarry Smith {
233927a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2340bdb1c0e1SJed Brown   PetscInt    i,nz,mbs;
234127a8da17SBarry Smith 
234227a8da17SBarry Smith   PetscFunctionBegin;
2343b32cb4a7SJed Brown   nz  = baij->maxnz;
2344bdb1c0e1SJed Brown   mbs = baij->mbs;
234527a8da17SBarry Smith   for (i=0; i<nz; i++) {
234627a8da17SBarry Smith     baij->j[i] = indices[i];
234727a8da17SBarry Smith   }
234827a8da17SBarry Smith   baij->nz = nz;
2349bdb1c0e1SJed Brown   for (i=0; i<mbs; i++) {
235027a8da17SBarry Smith     baij->ilen[i] = baij->imax[i];
235127a8da17SBarry Smith   }
235227a8da17SBarry Smith   PetscFunctionReturn(0);
235327a8da17SBarry Smith }
235427a8da17SBarry Smith 
23554a2ae208SSatish Balay #undef __FUNCT__
23564a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
235727a8da17SBarry Smith /*@
235827a8da17SBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
235927a8da17SBarry Smith        in the matrix.
236027a8da17SBarry Smith 
236127a8da17SBarry Smith   Input Parameters:
236227a8da17SBarry Smith +  mat - the SeqBAIJ matrix
236327a8da17SBarry Smith -  indices - the column indices
236427a8da17SBarry Smith 
236515091d37SBarry Smith   Level: advanced
236615091d37SBarry Smith 
236727a8da17SBarry Smith   Notes:
236827a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
236927a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
237027a8da17SBarry Smith   of the MatSetValues() operation.
237127a8da17SBarry Smith 
237227a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
2373d1be2dadSMatthew Knepley   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
237427a8da17SBarry Smith 
237527a8da17SBarry Smith     MUST be called before any calls to MatSetValues();
237627a8da17SBarry Smith 
237727a8da17SBarry Smith @*/
23787087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
237927a8da17SBarry Smith {
23804ac538c5SBarry Smith   PetscErrorCode ierr;
238127a8da17SBarry Smith 
238227a8da17SBarry Smith   PetscFunctionBegin;
23830700a824SBarry Smith   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
23844482741eSBarry Smith   PetscValidPointer(indices,2);
23854ac538c5SBarry Smith   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
238627a8da17SBarry Smith   PetscFunctionReturn(0);
238727a8da17SBarry Smith }
238827a8da17SBarry Smith 
23894a2ae208SSatish Balay #undef __FUNCT__
2390985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2391985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2392273d9f13SBarry Smith {
2393273d9f13SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2394dfbe8321SBarry Smith   PetscErrorCode ierr;
2395c1ac3661SBarry Smith   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2396273d9f13SBarry Smith   PetscReal      atmp;
239787828ca2SBarry Smith   PetscScalar    *x,zero = 0.0;
2398273d9f13SBarry Smith   MatScalar      *aa;
2399c1ac3661SBarry Smith   PetscInt       ncols,brow,krow,kcol;
2400273d9f13SBarry Smith 
2401273d9f13SBarry Smith   PetscFunctionBegin;
2402e32f2f54SBarry Smith   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2403d0f46423SBarry Smith   bs  = A->rmap->bs;
2404273d9f13SBarry Smith   aa  = a->a;
2405273d9f13SBarry Smith   ai  = a->i;
2406273d9f13SBarry Smith   aj  = a->j;
2407273d9f13SBarry Smith   mbs = a->mbs;
2408273d9f13SBarry Smith 
24092dcb1b2aSMatthew Knepley   ierr = VecSet(v,zero);CHKERRQ(ierr);
24101ebc52fbSHong Zhang   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2411273d9f13SBarry Smith   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2412e32f2f54SBarry Smith   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2413273d9f13SBarry Smith   for (i=0; i<mbs; i++) {
2414273d9f13SBarry Smith     ncols = ai[1] - ai[0]; ai++;
2415273d9f13SBarry Smith     brow  = bs*i;
2416273d9f13SBarry Smith     for (j=0; j<ncols; j++) {
2417273d9f13SBarry Smith       for (kcol=0; kcol<bs; kcol++) {
2418273d9f13SBarry Smith         for (krow=0; krow<bs; krow++) {
2419273d9f13SBarry Smith           atmp = PetscAbsScalar(*aa);aa++;
2420273d9f13SBarry Smith           row  = brow + krow;   /* row index */
2421a83599f4SBarry Smith           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2422985db425SBarry Smith           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2423273d9f13SBarry Smith         }
2424273d9f13SBarry Smith       }
2425273d9f13SBarry Smith       aj++;
2426273d9f13SBarry Smith     }
2427273d9f13SBarry Smith   }
24281ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2429273d9f13SBarry Smith   PetscFunctionReturn(0);
2430273d9f13SBarry Smith }
2431273d9f13SBarry Smith 
24324a2ae208SSatish Balay #undef __FUNCT__
24333c896bc6SHong Zhang #define __FUNCT__ "MatCopy_SeqBAIJ"
24343c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
24353c896bc6SHong Zhang {
24363c896bc6SHong Zhang   PetscErrorCode ierr;
24373c896bc6SHong Zhang 
24383c896bc6SHong Zhang   PetscFunctionBegin;
24393c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
24403c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
24413c896bc6SHong Zhang     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
24423c896bc6SHong Zhang     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2443d88c0aacSHong Zhang     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
24443c896bc6SHong Zhang 
2445d88c0aacSHong Zhang     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2446d88c0aacSHong Zhang     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2447d88c0aacSHong Zhang     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
24483c896bc6SHong Zhang   } else {
24493c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
24503c896bc6SHong Zhang   }
24513c896bc6SHong Zhang   PetscFunctionReturn(0);
24523c896bc6SHong Zhang }
24533c896bc6SHong Zhang 
24543c896bc6SHong Zhang #undef __FUNCT__
24554994cf47SJed Brown #define __FUNCT__ "MatSetUp_SeqBAIJ"
24564994cf47SJed Brown PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2457273d9f13SBarry Smith {
2458dfbe8321SBarry Smith   PetscErrorCode ierr;
2459273d9f13SBarry Smith 
2460273d9f13SBarry Smith   PetscFunctionBegin;
2461535b19f3SBarry Smith   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2462273d9f13SBarry Smith   PetscFunctionReturn(0);
2463273d9f13SBarry Smith }
2464273d9f13SBarry Smith 
24654a2ae208SSatish Balay #undef __FUNCT__
24668c778c55SBarry Smith #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
24678c778c55SBarry Smith PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2468f2a5309cSSatish Balay {
2469f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
24706e111a19SKarl Rupp 
2471f2a5309cSSatish Balay   PetscFunctionBegin;
2472f2a5309cSSatish Balay   *array = a->a;
2473f2a5309cSSatish Balay   PetscFunctionReturn(0);
2474f2a5309cSSatish Balay }
2475f2a5309cSSatish Balay 
24764a2ae208SSatish Balay #undef __FUNCT__
24778c778c55SBarry Smith #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
24788c778c55SBarry Smith PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2479f2a5309cSSatish Balay {
2480f2a5309cSSatish Balay   PetscFunctionBegin;
2481f2a5309cSSatish Balay   PetscFunctionReturn(0);
2482f2a5309cSSatish Balay }
2483f2a5309cSSatish Balay 
248442ee4b1aSHong Zhang #undef __FUNCT__
248542ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ"
2486f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
248742ee4b1aSHong Zhang {
248842ee4b1aSHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2489dfbe8321SBarry Smith   PetscErrorCode ierr;
2490e838b9e7SJed Brown   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2491e838b9e7SJed Brown   PetscBLASInt   one=1;
249242ee4b1aSHong Zhang 
249342ee4b1aSHong Zhang   PetscFunctionBegin;
249442ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2495f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2496c5df96a5SBarry Smith     PetscBLASInt bnz;
2497c5df96a5SBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2498a83cb05cSBarry Smith     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2499c537a176SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2500c4319e64SHong Zhang     if (y->xtoy && y->XtoY != X) {
2501c4319e64SHong Zhang       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
25026bf464f9SBarry Smith       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2503c537a176SHong Zhang     }
2504c4319e64SHong Zhang     if (!y->xtoy) { /* get xtoy */
25050298fd71SBarry Smith       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2506c4319e64SHong Zhang       y->XtoY = X;
2507c009d632SSatish Balay       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2508c537a176SHong Zhang     }
2509c537a176SHong Zhang     for (i=0; i<x->nz; i++) {
2510c4319e64SHong Zhang       j = 0;
2511c4319e64SHong Zhang       while (j < bs2) {
2512f4df32b1SMatthew Knepley         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2513c4319e64SHong Zhang         j++;
2514c537a176SHong Zhang       }
2515c4319e64SHong Zhang     }
25161e2582c4SBarry Smith     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
251742ee4b1aSHong Zhang   } else {
2518f4df32b1SMatthew Knepley     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
251942ee4b1aSHong Zhang   }
252042ee4b1aSHong Zhang   PetscFunctionReturn(0);
252142ee4b1aSHong Zhang }
252242ee4b1aSHong Zhang 
252399cafbc1SBarry Smith #undef __FUNCT__
252499cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_SeqBAIJ"
252599cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
252699cafbc1SBarry Smith {
252799cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
252899cafbc1SBarry Smith   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2529dd6ea824SBarry Smith   MatScalar   *aa = a->a;
253099cafbc1SBarry Smith 
253199cafbc1SBarry Smith   PetscFunctionBegin;
253299cafbc1SBarry Smith   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
253399cafbc1SBarry Smith   PetscFunctionReturn(0);
253499cafbc1SBarry Smith }
253599cafbc1SBarry Smith 
253699cafbc1SBarry Smith #undef __FUNCT__
253799cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
253899cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
253999cafbc1SBarry Smith {
254099cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
254199cafbc1SBarry Smith   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2542dd6ea824SBarry Smith   MatScalar   *aa = a->a;
254399cafbc1SBarry Smith 
254499cafbc1SBarry Smith   PetscFunctionBegin;
254599cafbc1SBarry Smith   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
254699cafbc1SBarry Smith   PetscFunctionReturn(0);
254799cafbc1SBarry Smith }
254899cafbc1SBarry Smith 
25493acb8795SBarry Smith extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
25503acb8795SBarry Smith 
25513acb8795SBarry Smith #undef __FUNCT__
25523acb8795SBarry Smith #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
25533acb8795SBarry Smith /*
25543acb8795SBarry Smith     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
25553acb8795SBarry Smith */
25561a83f524SJed Brown PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
25573acb8795SBarry Smith {
25583acb8795SBarry Smith   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
25593acb8795SBarry Smith   PetscErrorCode ierr;
25603acb8795SBarry Smith   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
25613acb8795SBarry Smith   PetscInt       nz = a->i[m],row,*jj,mr,col;
25623acb8795SBarry Smith 
25633acb8795SBarry Smith   PetscFunctionBegin;
25643acb8795SBarry Smith   *nn = n;
25653acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
2566e7e72b3dSBarry Smith   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2567e7e72b3dSBarry Smith   else {
25683acb8795SBarry Smith     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
25693acb8795SBarry Smith     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
25703acb8795SBarry Smith     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
25713acb8795SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
25723acb8795SBarry Smith     jj   = a->j;
25733acb8795SBarry Smith     for (i=0; i<nz; i++) {
25743acb8795SBarry Smith       collengths[jj[i]]++;
25753acb8795SBarry Smith     }
25763acb8795SBarry Smith     cia[0] = oshift;
25773acb8795SBarry Smith     for (i=0; i<n; i++) {
25783acb8795SBarry Smith       cia[i+1] = cia[i] + collengths[i];
25793acb8795SBarry Smith     }
25803acb8795SBarry Smith     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
25813acb8795SBarry Smith     jj   = a->j;
25823acb8795SBarry Smith     for (row=0; row<m; row++) {
25833acb8795SBarry Smith       mr = a->i[row+1] - a->i[row];
25843acb8795SBarry Smith       for (i=0; i<mr; i++) {
25853acb8795SBarry Smith         col = *jj++;
258626fbe8dcSKarl Rupp 
25873acb8795SBarry Smith         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
25883acb8795SBarry Smith       }
25893acb8795SBarry Smith     }
25903acb8795SBarry Smith     ierr = PetscFree(collengths);CHKERRQ(ierr);
25913acb8795SBarry Smith     *ia  = cia; *ja = cja;
25923acb8795SBarry Smith   }
25933acb8795SBarry Smith   PetscFunctionReturn(0);
25943acb8795SBarry Smith }
25953acb8795SBarry Smith 
25963acb8795SBarry Smith #undef __FUNCT__
25973acb8795SBarry Smith #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
25981a83f524SJed Brown PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
25993acb8795SBarry Smith {
26003acb8795SBarry Smith   PetscErrorCode ierr;
26013acb8795SBarry Smith 
26023acb8795SBarry Smith   PetscFunctionBegin;
26033acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
26043acb8795SBarry Smith   ierr = PetscFree(*ia);CHKERRQ(ierr);
26053acb8795SBarry Smith   ierr = PetscFree(*ja);CHKERRQ(ierr);
26063acb8795SBarry Smith   PetscFunctionReturn(0);
26073acb8795SBarry Smith }
26083acb8795SBarry Smith 
2609f6d58c54SBarry Smith #undef __FUNCT__
2610f6d58c54SBarry Smith #define __FUNCT__ "MatFDColoringApply_BAIJ"
26117087cfbeSBarry Smith PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2612f6d58c54SBarry Smith {
2613f6d58c54SBarry Smith   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f;
2614f6d58c54SBarry Smith   PetscErrorCode ierr;
26154e269d77SPeter Brune   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2616f6d58c54SBarry Smith   PetscScalar    dx,*y,*xx,*w3_array;
2617f6d58c54SBarry Smith   PetscScalar    *vscale_array;
2618f6d58c54SBarry Smith   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2619f6d58c54SBarry Smith   Vec            w1      = coloring->w1,w2=coloring->w2,w3;
2620f6d58c54SBarry Smith   void           *fctx   = coloring->fctx;
2621ace3abfcSBarry Smith   PetscBool      flg     = PETSC_FALSE;
2622f6d58c54SBarry Smith   PetscInt       ctype   = coloring->ctype,N,col_start=0,col_end=0;
2623f6d58c54SBarry Smith   Vec            x1_tmp;
2624f6d58c54SBarry Smith 
2625f6d58c54SBarry Smith   PetscFunctionBegin;
26260700a824SBarry Smith   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
26270700a824SBarry Smith   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
26280700a824SBarry Smith   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2629e32f2f54SBarry Smith   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2630f6d58c54SBarry Smith 
2631f6d58c54SBarry Smith   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2632f6d58c54SBarry Smith   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
26330298fd71SBarry Smith   ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr);
2634f6d58c54SBarry Smith   if (flg) {
2635f6d58c54SBarry Smith     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2636f6d58c54SBarry Smith   } else {
2637ace3abfcSBarry Smith     PetscBool assembled;
2638f6d58c54SBarry Smith     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2639f6d58c54SBarry Smith     if (assembled) {
2640f6d58c54SBarry Smith       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2641f6d58c54SBarry Smith     }
2642f6d58c54SBarry Smith   }
2643f6d58c54SBarry Smith 
2644f6d58c54SBarry Smith   x1_tmp = x1;
2645f6d58c54SBarry Smith   if (!coloring->vscale) {
2646f6d58c54SBarry Smith     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2647f6d58c54SBarry Smith   }
2648f6d58c54SBarry Smith 
2649f6d58c54SBarry Smith   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2650f6d58c54SBarry Smith     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2651f6d58c54SBarry Smith   }
2652f6d58c54SBarry Smith   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2653f6d58c54SBarry Smith 
2654f6d58c54SBarry Smith   /* Set w1 = F(x1) */
26554e269d77SPeter Brune   if (!coloring->fset) {
2656f6d58c54SBarry Smith     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2657f6d58c54SBarry Smith     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2658f6d58c54SBarry Smith     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
26594e269d77SPeter Brune   } else {
26604e269d77SPeter Brune     coloring->fset = PETSC_FALSE;
2661f6d58c54SBarry Smith   }
2662f6d58c54SBarry Smith 
2663f6d58c54SBarry Smith   if (!coloring->w3) {
2664f6d58c54SBarry Smith     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2665f6d58c54SBarry Smith     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2666f6d58c54SBarry Smith   }
2667f6d58c54SBarry Smith   w3 = coloring->w3;
2668f6d58c54SBarry Smith 
2669f6d58c54SBarry Smith   /* Compute all the local scale factors, including ghost points */
2670f6d58c54SBarry Smith   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2671f6d58c54SBarry Smith   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2672f6d58c54SBarry Smith   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2673f6d58c54SBarry Smith   if (ctype == IS_COLORING_GHOSTED) {
2674f6d58c54SBarry Smith     col_start = 0; col_end = N;
2675f6d58c54SBarry Smith   } else if (ctype == IS_COLORING_GLOBAL) {
2676f6d58c54SBarry Smith     xx           = xx - start;
2677f6d58c54SBarry Smith     vscale_array = vscale_array - start;
2678f6d58c54SBarry Smith     col_start    = start; col_end = N + start;
267926fbe8dcSKarl Rupp   }
2680f6d58c54SBarry Smith   for (col=col_start; col<col_end; col++) {
2681f6d58c54SBarry Smith     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2682f6d58c54SBarry Smith     if (coloring->htype[0] == 'w') {
2683f6d58c54SBarry Smith       dx = 1.0 + unorm;
2684f6d58c54SBarry Smith     } else {
2685f6d58c54SBarry Smith       dx = xx[col];
2686f6d58c54SBarry Smith     }
2687d4a378daSJed Brown     if (dx == (PetscScalar)0.0) dx = 1.0;
2688f6d58c54SBarry Smith     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2689f6d58c54SBarry Smith     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2690f6d58c54SBarry Smith     dx               *= epsilon;
2691d4a378daSJed Brown     vscale_array[col] = (PetscScalar)1.0/dx;
2692365a8a9eSBarry Smith   }
2693f6d58c54SBarry Smith   if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2694f6d58c54SBarry Smith   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2695f6d58c54SBarry Smith   if (ctype == IS_COLORING_GLOBAL) {
2696f6d58c54SBarry Smith     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2697f6d58c54SBarry Smith     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2698f6d58c54SBarry Smith   }
2699f6d58c54SBarry Smith   if (coloring->vscaleforrow) {
2700f6d58c54SBarry Smith     vscaleforrow = coloring->vscaleforrow;
2701e7e72b3dSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2702f6d58c54SBarry Smith 
2703f6d58c54SBarry Smith   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2704f6d58c54SBarry Smith   /*
2705f6d58c54SBarry Smith     Loop over each color
2706f6d58c54SBarry Smith   */
2707f6d58c54SBarry Smith   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2708f6d58c54SBarry Smith   for (k=0; k<coloring->ncolors; k++) {
2709f6d58c54SBarry Smith     coloring->currentcolor = k;
2710f6d58c54SBarry Smith     for (i=0; i<bs; i++) {
2711f6d58c54SBarry Smith       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2712f6d58c54SBarry Smith       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2713f6d58c54SBarry Smith       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2714f6d58c54SBarry Smith       /*
2715f6d58c54SBarry Smith         Loop over each column associated with color
2716f6d58c54SBarry Smith         adding the perturbation to the vector w3.
2717f6d58c54SBarry Smith       */
2718f6d58c54SBarry Smith       for (l=0; l<coloring->ncolumns[k]; l++) {
2719f6d58c54SBarry Smith         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2720f6d58c54SBarry Smith         if (coloring->htype[0] == 'w') {
2721f6d58c54SBarry Smith           dx = 1.0 + unorm;
2722f6d58c54SBarry Smith         } else {
2723f6d58c54SBarry Smith           dx = xx[col];
2724f6d58c54SBarry Smith         }
2725d4a378daSJed Brown         if (dx == (PetscScalar)0.0) dx = 1.0;
2726f6d58c54SBarry Smith         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2727f6d58c54SBarry Smith         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2728f6d58c54SBarry Smith         dx *= epsilon;
2729e32f2f54SBarry Smith         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2730f6d58c54SBarry Smith         w3_array[col] += dx;
2731f6d58c54SBarry Smith       }
2732f6d58c54SBarry Smith       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2733f6d58c54SBarry Smith       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2734f6d58c54SBarry Smith 
2735f6d58c54SBarry Smith       /*
2736f6d58c54SBarry Smith         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2737f6d58c54SBarry Smith         w2 = F(x1 + dx) - F(x1)
2738f6d58c54SBarry Smith       */
2739f6d58c54SBarry Smith       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2740f6d58c54SBarry Smith       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2741f6d58c54SBarry Smith       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2742f6d58c54SBarry Smith       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2743f6d58c54SBarry Smith 
2744f6d58c54SBarry Smith       /*
2745f6d58c54SBarry Smith         Loop over rows of vector, putting results into Jacobian matrix
2746f6d58c54SBarry Smith       */
2747f6d58c54SBarry Smith       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2748f6d58c54SBarry Smith       for (l=0; l<coloring->nrows[k]; l++) {
2749f6d58c54SBarry Smith         row = bs*coloring->rows[k][l];                /* local row index */
2750f6d58c54SBarry Smith         col = i + bs*coloring->columnsforrow[k][l];       /* global column index */
2751f6d58c54SBarry Smith         for (j=0; j<bs; j++) {
2752f6d58c54SBarry Smith           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2753f6d58c54SBarry Smith           srows[j]  = row + start + j;
2754f6d58c54SBarry Smith         }
2755f6d58c54SBarry Smith         ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2756f6d58c54SBarry Smith       }
2757f6d58c54SBarry Smith       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2758f6d58c54SBarry Smith     }
2759f6d58c54SBarry Smith   } /* endof for each color */
2760f6d58c54SBarry Smith   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2761f6d58c54SBarry Smith   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2762f6d58c54SBarry Smith   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2763f6d58c54SBarry Smith   ierr = PetscFree(srows);CHKERRQ(ierr);
2764f6d58c54SBarry Smith 
2765f6d58c54SBarry Smith   coloring->currentcolor = -1;
276626fbe8dcSKarl Rupp 
2767f6d58c54SBarry Smith   ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768f6d58c54SBarry Smith   ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2769f6d58c54SBarry Smith   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2770f6d58c54SBarry Smith   PetscFunctionReturn(0);
2771f6d58c54SBarry Smith }
277299cafbc1SBarry Smith 
27732593348eSBarry Smith /* -------------------------------------------------------------------*/
27743964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2775cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
2776cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
2777cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
277897304618SKris Buschelman                                /* 4*/  MatMultAdd_SeqBAIJ_N,
27797c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
27807c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
2781db4efbfdSBarry Smith                                        0,
2782cc2dc46cSBarry Smith                                        0,
2783cc2dc46cSBarry Smith                                        0,
278497304618SKris Buschelman                                /* 10*/ 0,
2785cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
2786cc2dc46cSBarry Smith                                        0,
2787b6490206SBarry Smith                                        0,
2788f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
278997304618SKris Buschelman                                /* 15*/ MatGetInfo_SeqBAIJ,
2790cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
2791cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
2792cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
2793cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
279497304618SKris Buschelman                                /* 20*/ 0,
2795cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
2796cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
2797cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
2798d519adbfSMatthew Knepley                                /* 24*/ MatZeroRows_SeqBAIJ,
2799db4efbfdSBarry Smith                                        0,
2800db4efbfdSBarry Smith                                        0,
2801db4efbfdSBarry Smith                                        0,
2802db4efbfdSBarry Smith                                        0,
28034994cf47SJed Brown                                /* 29*/ MatSetUp_SeqBAIJ,
2804db4efbfdSBarry Smith                                        0,
2805db4efbfdSBarry Smith                                        0,
28068c778c55SBarry Smith                                        0,
28078c778c55SBarry Smith                                        0,
2808d519adbfSMatthew Knepley                                /* 34*/ MatDuplicate_SeqBAIJ,
2809cc2dc46cSBarry Smith                                        0,
2810cc2dc46cSBarry Smith                                        0,
2811cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
2812cc2dc46cSBarry Smith                                        0,
2813d519adbfSMatthew Knepley                                /* 39*/ MatAXPY_SeqBAIJ,
2814cc2dc46cSBarry Smith                                        MatGetSubMatrices_SeqBAIJ,
2815cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
2816cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
28173c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
2818d519adbfSMatthew Knepley                                /* 44*/ 0,
2819cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
2820cc2dc46cSBarry Smith                                        0,
2821cc2dc46cSBarry Smith                                        0,
282297b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
2823f73d5cc4SBarry Smith                                /* 49*/ 0,
28243b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
282592c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
28263acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
28273acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
28283acb8795SBarry Smith                                /* 54*/ MatFDColoringCreate_SeqAIJ,
2829cc2dc46cSBarry Smith                                        0,
2830cc2dc46cSBarry Smith                                        0,
2831cc2dc46cSBarry Smith                                        0,
2832d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
2833d519adbfSMatthew Knepley                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2834b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
2835b9b97703SBarry Smith                                        MatView_SeqBAIJ,
2836357abbc8SBarry Smith                                        0,
2837273d9f13SBarry Smith                                        0,
2838d519adbfSMatthew Knepley                                /* 64*/ 0,
2839273d9f13SBarry Smith                                        0,
2840273d9f13SBarry Smith                                        0,
2841273d9f13SBarry Smith                                        0,
2842273d9f13SBarry Smith                                        0,
2843d519adbfSMatthew Knepley                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2844273d9f13SBarry Smith                                        0,
2845c87e5d42SMatthew Knepley                                        MatConvert_Basic,
284697304618SKris Buschelman                                        0,
284797304618SKris Buschelman                                        0,
2848d519adbfSMatthew Knepley                                /* 74*/ 0,
2849f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
285097304618SKris Buschelman                                        0,
285197304618SKris Buschelman                                        0,
285297304618SKris Buschelman                                        0,
2853d519adbfSMatthew Knepley                                /* 79*/ 0,
285497304618SKris Buschelman                                        0,
285597304618SKris Buschelman                                        0,
285697304618SKris Buschelman                                        0,
28575bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
2858d519adbfSMatthew Knepley                                /* 84*/ 0,
2859b01c7715SBarry Smith                                        0,
2860b01c7715SBarry Smith                                        0,
2861b01c7715SBarry Smith                                        0,
2862865e5f61SKris Buschelman                                        0,
2863d519adbfSMatthew Knepley                                /* 89*/ 0,
2864865e5f61SKris Buschelman                                        0,
2865865e5f61SKris Buschelman                                        0,
2866865e5f61SKris Buschelman                                        0,
2867865e5f61SKris Buschelman                                        0,
2868d519adbfSMatthew Knepley                                /* 94*/ 0,
2869865e5f61SKris Buschelman                                        0,
2870865e5f61SKris Buschelman                                        0,
287199cafbc1SBarry Smith                                        0,
287299cafbc1SBarry Smith                                        0,
2873d519adbfSMatthew Knepley                                /* 99*/ 0,
287499cafbc1SBarry Smith                                        0,
287599cafbc1SBarry Smith                                        0,
287699cafbc1SBarry Smith                                        0,
287799cafbc1SBarry Smith                                        0,
2878d519adbfSMatthew Knepley                                /*104*/ 0,
287999cafbc1SBarry Smith                                        MatRealPart_SeqBAIJ,
28802af78befSBarry Smith                                        MatImaginaryPart_SeqBAIJ,
28812af78befSBarry Smith                                        0,
28822af78befSBarry Smith                                        0,
2883d519adbfSMatthew Knepley                                /*109*/ 0,
28842af78befSBarry Smith                                        0,
28852af78befSBarry Smith                                        0,
28862af78befSBarry Smith                                        0,
2887547795f9SHong Zhang                                        MatMissingDiagonal_SeqBAIJ,
2888547795f9SHong Zhang                                /*114*/ 0,
2889547795f9SHong Zhang                                        0,
2890547795f9SHong Zhang                                        0,
2891547795f9SHong Zhang                                        0,
2892547795f9SHong Zhang                                        0,
2893547795f9SHong Zhang                                /*119*/ 0,
2894547795f9SHong Zhang                                        0,
2895547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
2896d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2897d6037b41SHong Zhang                                        0,
2898bbead8a2SBarry Smith                                /*124*/ 0,
2899bbead8a2SBarry Smith                                        0,
29003964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
29013964eb88SJed Brown                                        0,
29023964eb88SJed Brown                                        0,
29033964eb88SJed Brown                                /*129*/ 0,
29043964eb88SJed Brown                                        0,
29053964eb88SJed Brown                                        0,
29063964eb88SJed Brown                                        0,
29073964eb88SJed Brown                                        0,
29083964eb88SJed Brown                                /*134*/ 0,
29093964eb88SJed Brown                                        0,
29103964eb88SJed Brown                                        0,
29113964eb88SJed Brown                                        0,
29123964eb88SJed Brown                                        0,
29133964eb88SJed Brown                                /*139*/ 0,
29143964eb88SJed Brown                                        0
291599cafbc1SBarry Smith };
29162593348eSBarry Smith 
29174a2ae208SSatish Balay #undef __FUNCT__
29184a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ"
29197087cfbeSBarry Smith PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
29203e90b805SBarry Smith {
29213e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
29228ece6314SShri Abhyankar   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2923dfbe8321SBarry Smith   PetscErrorCode ierr;
29243e90b805SBarry Smith 
29253e90b805SBarry Smith   PetscFunctionBegin;
2926e7e72b3dSBarry Smith   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
29273e90b805SBarry Smith 
29283e90b805SBarry Smith   /* allocate space for values if not already there */
29293e90b805SBarry Smith   if (!aij->saved_values) {
293087828ca2SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
29311784c0f5SBarry Smith     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
29323e90b805SBarry Smith   }
29333e90b805SBarry Smith 
29343e90b805SBarry Smith   /* copy values over */
293587828ca2SBarry Smith   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
29363e90b805SBarry Smith   PetscFunctionReturn(0);
29373e90b805SBarry Smith }
29383e90b805SBarry Smith 
29394a2ae208SSatish Balay #undef __FUNCT__
29404a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
29417087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
29423e90b805SBarry Smith {
29433e90b805SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
29446849ba73SBarry Smith   PetscErrorCode ierr;
29458ece6314SShri Abhyankar   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
29463e90b805SBarry Smith 
29473e90b805SBarry Smith   PetscFunctionBegin;
2948e7e72b3dSBarry Smith   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2949e7e72b3dSBarry Smith   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
29503e90b805SBarry Smith 
29513e90b805SBarry Smith   /* copy values over */
295287828ca2SBarry Smith   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
29533e90b805SBarry Smith   PetscFunctionReturn(0);
29543e90b805SBarry Smith }
29553e90b805SBarry Smith 
29568cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
29578cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2958273d9f13SBarry Smith 
29594a2ae208SSatish Balay #undef __FUNCT__
2960a23d5eceSKris Buschelman #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
29617087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2962a23d5eceSKris Buschelman {
2963a23d5eceSKris Buschelman   Mat_SeqBAIJ    *b;
29646849ba73SBarry Smith   PetscErrorCode ierr;
2965535b19f3SBarry Smith   PetscInt       i,mbs,nbs,bs2;
29662576faa2SJed Brown   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2967a23d5eceSKris Buschelman 
2968a23d5eceSKris Buschelman   PetscFunctionBegin;
29692576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2970ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
2971ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
2972ab93d7beSBarry Smith     nz             = 0;
2973ab93d7beSBarry Smith   }
29748c07d4e3SBarry Smith 
297526283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
297626283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
297726283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
297826283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2979e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2980899cda47SBarry Smith 
2981899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
2982899cda47SBarry Smith 
2983d0f46423SBarry Smith   mbs = B->rmap->n/bs;
2984d0f46423SBarry Smith   nbs = B->cmap->n/bs;
2985a23d5eceSKris Buschelman   bs2 = bs*bs;
2986a23d5eceSKris Buschelman 
298765e19b50SBarry Smith   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2988a23d5eceSKris Buschelman 
2989a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2990e32f2f54SBarry Smith   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2991a23d5eceSKris Buschelman   if (nnz) {
2992a23d5eceSKris Buschelman     for (i=0; i<mbs; i++) {
2993e32f2f54SBarry Smith       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2994e32f2f54SBarry Smith       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2995a23d5eceSKris Buschelman     }
2996a23d5eceSKris Buschelman   }
2997a23d5eceSKris Buschelman 
2998a23d5eceSKris Buschelman   b    = (Mat_SeqBAIJ*)B->data;
2999ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
30000298fd71SBarry Smith   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
30018c07d4e3SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
30028c07d4e3SBarry Smith 
3003a23d5eceSKris Buschelman   if (!flg) {
3004a23d5eceSKris Buschelman     switch (bs) {
3005a23d5eceSKris Buschelman     case 1:
3006a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3007a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3008a23d5eceSKris Buschelman       break;
3009a23d5eceSKris Buschelman     case 2:
3010a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3011a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3012a23d5eceSKris Buschelman       break;
3013a23d5eceSKris Buschelman     case 3:
3014a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3015a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3016a23d5eceSKris Buschelman       break;
3017a23d5eceSKris Buschelman     case 4:
3018a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3019a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3020a23d5eceSKris Buschelman       break;
3021a23d5eceSKris Buschelman     case 5:
3022a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3023a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3024a23d5eceSKris Buschelman       break;
3025a23d5eceSKris Buschelman     case 6:
3026a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3027a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3028a23d5eceSKris Buschelman       break;
3029a23d5eceSKris Buschelman     case 7:
3030a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3031a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3032a23d5eceSKris Buschelman       break;
30338ab949d8SShri Abhyankar     case 15:
3034832cc040SShri Abhyankar       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
3035de80f912SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
30368ab949d8SShri Abhyankar       break;
3037a23d5eceSKris Buschelman     default:
3038a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_N;
3039a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3040a23d5eceSKris Buschelman       break;
3041a23d5eceSKris Buschelman     }
3042a23d5eceSKris Buschelman   }
3043*e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3044a23d5eceSKris Buschelman   b->mbs = mbs;
3045a23d5eceSKris Buschelman   b->nbs = nbs;
3046ab93d7beSBarry Smith   if (!skipallocation) {
30472ee49352SLisandro Dalcin     if (!b->imax) {
3048ab93d7beSBarry Smith       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3049a2ea699eSBarry Smith       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
305026fbe8dcSKarl Rupp 
30514fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
30522ee49352SLisandro Dalcin     }
3053ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
305426fbe8dcSKarl Rupp     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3055a23d5eceSKris Buschelman     if (!nnz) {
3056a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3057c62bd62aSJed Brown       else if (nz < 0) nz = 1;
3058a23d5eceSKris Buschelman       for (i=0; i<mbs; i++) b->imax[i] = nz;
3059a23d5eceSKris Buschelman       nz = nz*mbs;
3060a23d5eceSKris Buschelman     } else {
3061a23d5eceSKris Buschelman       nz = 0;
3062a23d5eceSKris Buschelman       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3063a23d5eceSKris Buschelman     }
3064a23d5eceSKris Buschelman 
3065a23d5eceSKris Buschelman     /* allocate the matrix space */
30662ee49352SLisandro Dalcin     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3067d0f46423SBarry Smith     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3068d0f46423SBarry Smith     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3069a23d5eceSKris Buschelman     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3070c1ac3661SBarry Smith     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
307126fbe8dcSKarl Rupp 
3072a23d5eceSKris Buschelman     b->singlemalloc = PETSC_TRUE;
3073a23d5eceSKris Buschelman     b->i[0]         = 0;
3074a23d5eceSKris Buschelman     for (i=1; i<mbs+1; i++) {
3075a23d5eceSKris Buschelman       b->i[i] = b->i[i-1] + b->imax[i-1];
3076a23d5eceSKris Buschelman     }
3077e6b907acSBarry Smith     b->free_a  = PETSC_TRUE;
3078e6b907acSBarry Smith     b->free_ij = PETSC_TRUE;
3079e811da20SHong Zhang   } else {
3080e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3081e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3082ab93d7beSBarry Smith   }
3083a23d5eceSKris Buschelman 
3084a23d5eceSKris Buschelman   b->bs2              = bs2;
3085a23d5eceSKris Buschelman   b->mbs              = mbs;
3086a23d5eceSKris Buschelman   b->nz               = 0;
3087b32cb4a7SJed Brown   b->maxnz            = nz;
3088b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
30892576faa2SJed Brown   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3090a23d5eceSKris Buschelman   PetscFunctionReturn(0);
3091a23d5eceSKris Buschelman }
3092a23d5eceSKris Buschelman 
3093725b52f3SLisandro Dalcin #undef __FUNCT__
3094725b52f3SLisandro Dalcin #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3095cf12db73SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3096725b52f3SLisandro Dalcin {
3097725b52f3SLisandro Dalcin   PetscInt       i,m,nz,nz_max=0,*nnz;
3098725b52f3SLisandro Dalcin   PetscScalar    *values=0;
3099725b52f3SLisandro Dalcin   PetscErrorCode ierr;
3100725b52f3SLisandro Dalcin 
3101725b52f3SLisandro Dalcin   PetscFunctionBegin;
3102e32f2f54SBarry Smith   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
310326283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
310426283091SBarry Smith   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
310526283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
310626283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3107e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3108d0f46423SBarry Smith   m    = B->rmap->n/bs;
3109725b52f3SLisandro Dalcin 
311026fbe8dcSKarl Rupp   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3111725b52f3SLisandro Dalcin   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3112725b52f3SLisandro Dalcin   for (i=0; i<m; i++) {
3113cf12db73SBarry Smith     nz = ii[i+1]- ii[i];
311426fbe8dcSKarl Rupp     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3115725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3116725b52f3SLisandro Dalcin     nnz[i] = nz;
3117725b52f3SLisandro Dalcin   }
3118725b52f3SLisandro Dalcin   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3119725b52f3SLisandro Dalcin   ierr = PetscFree(nnz);CHKERRQ(ierr);
3120725b52f3SLisandro Dalcin 
3121725b52f3SLisandro Dalcin   values = (PetscScalar*)V;
3122725b52f3SLisandro Dalcin   if (!values) {
3123725b52f3SLisandro Dalcin     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3124725b52f3SLisandro Dalcin     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3125725b52f3SLisandro Dalcin   }
3126725b52f3SLisandro Dalcin   for (i=0; i<m; i++) {
3127cf12db73SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
3128cf12db73SBarry Smith     const PetscInt    *icols = jj + ii[i];
3129cf12db73SBarry Smith     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3130725b52f3SLisandro Dalcin     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3131725b52f3SLisandro Dalcin   }
3132725b52f3SLisandro Dalcin   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3133725b52f3SLisandro Dalcin   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3134725b52f3SLisandro Dalcin   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31357827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3136725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3137725b52f3SLisandro Dalcin }
3138725b52f3SLisandro Dalcin 
31398cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
31408cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
314167877ebaSShri Abhyankar #if defined(PETSC_HAVE_MUMPS)
31428cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
314367877ebaSShri Abhyankar #endif
3144b2573a8aSBarry Smith extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
3145b24902e0SBarry Smith 
31460bad9183SKris Buschelman /*MC
3147fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
31480bad9183SKris Buschelman    block sparse compressed row format.
31490bad9183SKris Buschelman 
31500bad9183SKris Buschelman    Options Database Keys:
31510bad9183SKris Buschelman . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
31520bad9183SKris Buschelman 
31530bad9183SKris Buschelman   Level: beginner
31540bad9183SKris Buschelman 
3155f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ()
31560bad9183SKris Buschelman M*/
31570bad9183SKris Buschelman 
31588cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3159b24902e0SBarry Smith 
3160a23d5eceSKris Buschelman #undef __FUNCT__
31614a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ"
31628cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
31632593348eSBarry Smith {
3164dfbe8321SBarry Smith   PetscErrorCode ierr;
3165c1ac3661SBarry Smith   PetscMPIInt    size;
3166b6490206SBarry Smith   Mat_SeqBAIJ    *b;
31673b2fbd54SBarry Smith 
31683a40ed3dSBarry Smith   PetscFunctionBegin;
3169ce94432eSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3170e32f2f54SBarry Smith   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3171b6490206SBarry Smith 
317238f2d2fdSLisandro Dalcin   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3173b0a32e0cSBarry Smith   B->data = (void*)b;
3174549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
317526fbe8dcSKarl Rupp 
31762593348eSBarry Smith   b->row          = 0;
31772593348eSBarry Smith   b->col          = 0;
3178e51c0b9cSSatish Balay   b->icol         = 0;
31792593348eSBarry Smith   b->reallocs     = 0;
31803e90b805SBarry Smith   b->saved_values = 0;
31812593348eSBarry Smith 
3182c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
31832593348eSBarry Smith   b->nonew              = 0;
31842593348eSBarry Smith   b->diag               = 0;
31852593348eSBarry Smith   b->solve_work         = 0;
3186de6a44a3SBarry Smith   b->mult_work          = 0;
31872a1b7f2aSHong Zhang   B->spptr              = 0;
3188b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3189a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
3190c4319e64SHong Zhang   b->xtoy               = 0;
3191c4319e64SHong Zhang   b->XtoY               = 0;
319288e51ccdSHong Zhang   B->same_nonzero       = PETSC_FALSE;
31934e220ebcSLois Curfman McInnes 
319400de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C","MatGetFactorAvailable_seqbaij_petsc",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
319500de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C","MatGetFactor_seqbaij_petsc",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
319600de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C","MatGetFactor_seqbaij_bstrm",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
319767877ebaSShri Abhyankar #if defined(PETSC_HAVE_MUMPS)
319800de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
319967877ebaSShri Abhyankar #endif
320000de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C","MatInvertBlockDiagonal_SeqBAIJ",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
320100de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_SeqBAIJ",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
320200de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_SeqBAIJ",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
320300de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C","MatSeqBAIJSetColumnIndices_SeqBAIJ",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
320400de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C","MatConvert_SeqBAIJ_SeqAIJ",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
320500de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C","MatConvert_SeqBAIJ_SeqSBAIJ",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
320600de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C","MatSeqBAIJSetPreallocation_SeqBAIJ",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
320700de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C","MatSeqBAIJSetPreallocationCSR_SeqBAIJ",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
320800de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C","MatConvert_SeqBAIJ_SeqBSTRM",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
320900de8ff0SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C","MatIsTranspose_SeqBAIJ",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
321017667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
32113a40ed3dSBarry Smith   PetscFunctionReturn(0);
32122593348eSBarry Smith }
32132593348eSBarry Smith 
32144a2ae208SSatish Balay #undef __FUNCT__
3215b24902e0SBarry Smith #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3216ace3abfcSBarry Smith PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
32172593348eSBarry Smith {
3218b24902e0SBarry Smith   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
32196849ba73SBarry Smith   PetscErrorCode ierr;
3220a96a251dSBarry Smith   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3221de6a44a3SBarry Smith 
32223a40ed3dSBarry Smith   PetscFunctionBegin;
3223e32f2f54SBarry Smith   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
32242593348eSBarry Smith 
32254fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
32264fd072dbSBarry Smith     c->imax           = a->imax;
32274fd072dbSBarry Smith     c->ilen           = a->ilen;
32284fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
32294fd072dbSBarry Smith   } else {
323033b91e9fSSatish Balay     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
32314fd072dbSBarry Smith     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3232b6490206SBarry Smith     for (i=0; i<mbs; i++) {
32332593348eSBarry Smith       c->imax[i] = a->imax[i];
32342593348eSBarry Smith       c->ilen[i] = a->ilen[i];
32352593348eSBarry Smith     }
32364fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
32374fd072dbSBarry Smith   }
32382593348eSBarry Smith 
32392593348eSBarry Smith   /* allocate the matrix space */
324016a2bf60SHong Zhang   if (mallocmatspace) {
32414fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
32424fd072dbSBarry Smith       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
32434fd072dbSBarry Smith       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3244379be0ddSLisandro Dalcin       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
324526fbe8dcSKarl Rupp 
32464fd072dbSBarry Smith       c->i            = a->i;
32474fd072dbSBarry Smith       c->j            = a->j;
3248379be0ddSLisandro Dalcin       c->singlemalloc = PETSC_FALSE;
3249379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
3250379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
32514fd072dbSBarry Smith       c->parent       = A;
32521e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
32531e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
325426fbe8dcSKarl Rupp 
32554fd072dbSBarry Smith       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
32564fd072dbSBarry Smith       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
32574fd072dbSBarry Smith       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
32584fd072dbSBarry Smith     } else {
3259a96a251dSBarry Smith       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
326016a2bf60SHong Zhang       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
326126fbe8dcSKarl Rupp 
3262c4992f7dSBarry Smith       c->singlemalloc = PETSC_TRUE;
3263379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
32644fd072dbSBarry Smith       c->free_ij      = PETSC_TRUE;
326526fbe8dcSKarl Rupp 
3266c1ac3661SBarry Smith       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3267b6490206SBarry Smith       if (mbs > 0) {
3268c1ac3661SBarry Smith         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
32692e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
3270549d3d68SSatish Balay           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
32712e8a6d31SBarry Smith         } else {
3272549d3d68SSatish Balay           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
32732593348eSBarry Smith         }
32742593348eSBarry Smith       }
32751e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
32761e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
327716a2bf60SHong Zhang     }
32784fd072dbSBarry Smith   }
327916a2bf60SHong Zhang 
32802593348eSBarry Smith   c->roworiented = a->roworiented;
32812593348eSBarry Smith   c->nonew       = a->nonew;
328226fbe8dcSKarl Rupp 
32831e1e43feSBarry Smith   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
32841e1e43feSBarry Smith   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
328526fbe8dcSKarl Rupp 
32865c9eb25fSBarry Smith   c->bs2         = a->bs2;
32875c9eb25fSBarry Smith   c->mbs         = a->mbs;
32885c9eb25fSBarry Smith   c->nbs         = a->nbs;
32892593348eSBarry Smith 
32902593348eSBarry Smith   if (a->diag) {
32914fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
32924fd072dbSBarry Smith       c->diag      = a->diag;
32934fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
32944fd072dbSBarry Smith     } else {
3295c1ac3661SBarry Smith       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
329652e6d16bSBarry Smith       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
329726fbe8dcSKarl Rupp       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
32984fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
32994fd072dbSBarry Smith     }
330098305bb5SBarry Smith   } else c->diag = 0;
330126fbe8dcSKarl Rupp 
33022593348eSBarry Smith   c->nz         = a->nz;
3303f2cbd3d5SJed Brown   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
33042593348eSBarry Smith   c->solve_work = 0;
33057fc0212eSBarry Smith   c->mult_work  = 0;
330688e51ccdSHong Zhang 
330788e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
330888e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3309cd6b891eSBarry Smith   c->compressedrow.check = a->compressedrow.check;
3310cd6b891eSBarry Smith   if (a->compressedrow.use) {
331188e51ccdSHong Zhang     i    = a->compressedrow.nrows;
33120e83c824SBarry Smith     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
33134fd072dbSBarry Smith     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
331488e51ccdSHong Zhang     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
331588e51ccdSHong Zhang     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
331688e51ccdSHong Zhang   } else {
331788e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
33180298fd71SBarry Smith     c->compressedrow.i      = NULL;
33190298fd71SBarry Smith     c->compressedrow.rindex = NULL;
332088e51ccdSHong Zhang   }
332188e51ccdSHong Zhang   C->same_nonzero = A->same_nonzero;
332226fbe8dcSKarl Rupp 
3323140e18c1SBarry Smith   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
33245d5aaa0eSBarry Smith   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
33253a40ed3dSBarry Smith   PetscFunctionReturn(0);
33262593348eSBarry Smith }
33272593348eSBarry Smith 
33284a2ae208SSatish Balay #undef __FUNCT__
3329b24902e0SBarry Smith #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3330b24902e0SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3331b24902e0SBarry Smith {
3332b24902e0SBarry Smith   PetscErrorCode ierr;
3333b24902e0SBarry Smith 
3334b24902e0SBarry Smith   PetscFunctionBegin;
3335ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3336d0f46423SBarry Smith   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
33375c9eb25fSBarry Smith   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
333898ad0f72SJed Brown   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3339b24902e0SBarry Smith   PetscFunctionReturn(0);
3340b24902e0SBarry Smith }
3341b24902e0SBarry Smith 
3342b24902e0SBarry Smith #undef __FUNCT__
33435bba2384SShri Abhyankar #define __FUNCT__ "MatLoad_SeqBAIJ"
3344112444f4SShri Abhyankar PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3345f501eaabSShri Abhyankar {
3346f501eaabSShri Abhyankar   Mat_SeqBAIJ    *a;
3347f501eaabSShri Abhyankar   PetscErrorCode ierr;
3348f501eaabSShri Abhyankar   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3349f501eaabSShri Abhyankar   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3350f501eaabSShri Abhyankar   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3351f501eaabSShri Abhyankar   PetscInt       *masked,nmask,tmp,bs2,ishift;
3352f501eaabSShri Abhyankar   PetscMPIInt    size;
3353f501eaabSShri Abhyankar   int            fd;
3354f501eaabSShri Abhyankar   PetscScalar    *aa;
3355ce94432eSBarry Smith   MPI_Comm       comm;
3356f501eaabSShri Abhyankar 
3357f501eaabSShri Abhyankar   PetscFunctionBegin;
3358ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
33590298fd71SBarry Smith   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
33600298fd71SBarry Smith   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3361f501eaabSShri Abhyankar   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3362f501eaabSShri Abhyankar   bs2  = bs*bs;
3363f501eaabSShri Abhyankar 
3364f501eaabSShri Abhyankar   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3365f501eaabSShri Abhyankar   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3366f501eaabSShri Abhyankar   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3367f501eaabSShri Abhyankar   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3368f501eaabSShri Abhyankar   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3369f501eaabSShri Abhyankar   M = header[1]; N = header[2]; nz = header[3];
3370f501eaabSShri Abhyankar 
3371f501eaabSShri Abhyankar   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3372f501eaabSShri Abhyankar   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3373f501eaabSShri Abhyankar 
3374f501eaabSShri Abhyankar   /*
3375f501eaabSShri Abhyankar      This code adds extra rows to make sure the number of rows is
3376f501eaabSShri Abhyankar     divisible by the blocksize
3377f501eaabSShri Abhyankar   */
3378f501eaabSShri Abhyankar   mbs        = M/bs;
3379f501eaabSShri Abhyankar   extra_rows = bs - M + bs*(mbs);
3380f501eaabSShri Abhyankar   if (extra_rows == bs) extra_rows = 0;
3381f501eaabSShri Abhyankar   else mbs++;
3382f501eaabSShri Abhyankar   if (extra_rows) {
3383f501eaabSShri Abhyankar     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3384f501eaabSShri Abhyankar   }
3385f501eaabSShri Abhyankar 
3386f501eaabSShri Abhyankar   /* Set global sizes if not already set */
3387f501eaabSShri Abhyankar   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3388f501eaabSShri Abhyankar     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3389f501eaabSShri Abhyankar   } else { /* Check if the matrix global sizes are correct */
3390f501eaabSShri Abhyankar     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
33914c5b953cSHong Zhang     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
33924c5b953cSHong Zhang       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
33934c5b953cSHong Zhang     }
3394f501eaabSShri Abhyankar     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3395f501eaabSShri Abhyankar   }
3396f501eaabSShri Abhyankar 
3397f501eaabSShri Abhyankar   /* read in row lengths */
3398f501eaabSShri Abhyankar   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3399f501eaabSShri Abhyankar   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3400f501eaabSShri Abhyankar   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3401f501eaabSShri Abhyankar 
3402f501eaabSShri Abhyankar   /* read in column indices */
3403f501eaabSShri Abhyankar   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3404f501eaabSShri Abhyankar   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3405f501eaabSShri Abhyankar   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3406f501eaabSShri Abhyankar 
3407f501eaabSShri Abhyankar   /* loop over row lengths determining block row lengths */
3408f501eaabSShri Abhyankar   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3409f501eaabSShri Abhyankar   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3410f501eaabSShri Abhyankar   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3411f501eaabSShri Abhyankar   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3412f501eaabSShri Abhyankar   rowcount = 0;
3413f501eaabSShri Abhyankar   nzcount  = 0;
3414f501eaabSShri Abhyankar   for (i=0; i<mbs; i++) {
3415f501eaabSShri Abhyankar     nmask = 0;
3416f501eaabSShri Abhyankar     for (j=0; j<bs; j++) {
3417f501eaabSShri Abhyankar       kmax = rowlengths[rowcount];
3418f501eaabSShri Abhyankar       for (k=0; k<kmax; k++) {
3419f501eaabSShri Abhyankar         tmp = jj[nzcount++]/bs;
3420f501eaabSShri Abhyankar         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3421f501eaabSShri Abhyankar       }
3422f501eaabSShri Abhyankar       rowcount++;
3423f501eaabSShri Abhyankar     }
3424f501eaabSShri Abhyankar     browlengths[i] += nmask;
3425f501eaabSShri Abhyankar     /* zero out the mask elements we set */
3426f501eaabSShri Abhyankar     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3427f501eaabSShri Abhyankar   }
3428f501eaabSShri Abhyankar 
34292f480046SShri Abhyankar   /* Do preallocation  */
3430f501eaabSShri Abhyankar   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3431f501eaabSShri Abhyankar   a    = (Mat_SeqBAIJ*)newmat->data;
3432f501eaabSShri Abhyankar 
3433f501eaabSShri Abhyankar   /* set matrix "i" values */
3434f501eaabSShri Abhyankar   a->i[0] = 0;
3435f501eaabSShri Abhyankar   for (i=1; i<= mbs; i++) {
3436f501eaabSShri Abhyankar     a->i[i]      = a->i[i-1] + browlengths[i-1];
3437f501eaabSShri Abhyankar     a->ilen[i-1] = browlengths[i-1];
3438f501eaabSShri Abhyankar   }
3439f501eaabSShri Abhyankar   a->nz = 0;
3440f501eaabSShri Abhyankar   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3441f501eaabSShri Abhyankar 
3442f501eaabSShri Abhyankar   /* read in nonzero values */
3443f501eaabSShri Abhyankar   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3444f501eaabSShri Abhyankar   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3445f501eaabSShri Abhyankar   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3446f501eaabSShri Abhyankar 
3447f501eaabSShri Abhyankar   /* set "a" and "j" values into matrix */
3448f501eaabSShri Abhyankar   nzcount = 0; jcount = 0;
3449f501eaabSShri Abhyankar   for (i=0; i<mbs; i++) {
3450f501eaabSShri Abhyankar     nzcountb = nzcount;
3451f501eaabSShri Abhyankar     nmask    = 0;
3452f501eaabSShri Abhyankar     for (j=0; j<bs; j++) {
3453f501eaabSShri Abhyankar       kmax = rowlengths[i*bs+j];
3454f501eaabSShri Abhyankar       for (k=0; k<kmax; k++) {
3455f501eaabSShri Abhyankar         tmp = jj[nzcount++]/bs;
3456f501eaabSShri Abhyankar         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3457f501eaabSShri Abhyankar       }
3458f501eaabSShri Abhyankar     }
3459f501eaabSShri Abhyankar     /* sort the masked values */
3460f501eaabSShri Abhyankar     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3461f501eaabSShri Abhyankar 
3462f501eaabSShri Abhyankar     /* set "j" values into matrix */
3463f501eaabSShri Abhyankar     maskcount = 1;
3464f501eaabSShri Abhyankar     for (j=0; j<nmask; j++) {
3465f501eaabSShri Abhyankar       a->j[jcount++]  = masked[j];
3466f501eaabSShri Abhyankar       mask[masked[j]] = maskcount++;
3467f501eaabSShri Abhyankar     }
3468f501eaabSShri Abhyankar     /* set "a" values into matrix */
3469f501eaabSShri Abhyankar     ishift = bs2*a->i[i];
3470f501eaabSShri Abhyankar     for (j=0; j<bs; j++) {
3471f501eaabSShri Abhyankar       kmax = rowlengths[i*bs+j];
3472f501eaabSShri Abhyankar       for (k=0; k<kmax; k++) {
3473f501eaabSShri Abhyankar         tmp       = jj[nzcountb]/bs;
3474f501eaabSShri Abhyankar         block     = mask[tmp] - 1;
3475f501eaabSShri Abhyankar         point     = jj[nzcountb] - bs*tmp;
3476f501eaabSShri Abhyankar         idx       = ishift + bs2*block + j + bs*point;
3477f501eaabSShri Abhyankar         a->a[idx] = (MatScalar)aa[nzcountb++];
3478f501eaabSShri Abhyankar       }
3479f501eaabSShri Abhyankar     }
3480f501eaabSShri Abhyankar     /* zero out the mask elements we set */
3481f501eaabSShri Abhyankar     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3482f501eaabSShri Abhyankar   }
3483f501eaabSShri Abhyankar   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3484f501eaabSShri Abhyankar 
3485f501eaabSShri Abhyankar   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3486f501eaabSShri Abhyankar   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3487f501eaabSShri Abhyankar   ierr = PetscFree(aa);CHKERRQ(ierr);
3488f501eaabSShri Abhyankar   ierr = PetscFree(jj);CHKERRQ(ierr);
3489f501eaabSShri Abhyankar   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3490f501eaabSShri Abhyankar 
3491f501eaabSShri Abhyankar   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3492f501eaabSShri Abhyankar   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3493f501eaabSShri Abhyankar   PetscFunctionReturn(0);
3494f501eaabSShri Abhyankar }
3495f501eaabSShri Abhyankar 
3496f501eaabSShri Abhyankar #undef __FUNCT__
34974a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ"
3498273d9f13SBarry Smith /*@C
3499273d9f13SBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3500273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
3501273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3502273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3503273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
35042593348eSBarry Smith 
3505273d9f13SBarry Smith    Collective on MPI_Comm
3506273d9f13SBarry Smith 
3507273d9f13SBarry Smith    Input Parameters:
3508273d9f13SBarry Smith +  comm - MPI communicator, set to PETSC_COMM_SELF
3509273d9f13SBarry Smith .  bs - size of block
3510273d9f13SBarry Smith .  m - number of rows
3511273d9f13SBarry Smith .  n - number of columns
351235d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
351335d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
35140298fd71SBarry Smith          (possibly different for each block row) or NULL
3515273d9f13SBarry Smith 
3516273d9f13SBarry Smith    Output Parameter:
3517273d9f13SBarry Smith .  A - the matrix
3518273d9f13SBarry Smith 
3519175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3520ae1d86c5SBarry Smith    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3521175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3522175b88e8SBarry Smith 
3523273d9f13SBarry Smith    Options Database Keys:
3524273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
3525273d9f13SBarry Smith                      block calculations (much slower)
3526273d9f13SBarry Smith .    -mat_block_size - size of the blocks to use
3527273d9f13SBarry Smith 
3528273d9f13SBarry Smith    Level: intermediate
3529273d9f13SBarry Smith 
3530273d9f13SBarry Smith    Notes:
3531d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
3532d1be2dadSMatthew Knepley 
353349a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
353449a6f317SBarry Smith 
353535d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
353635d8aa7fSBarry Smith 
3537273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3538273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3539273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3540273d9f13SBarry Smith 
3541273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
35420298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
35430598bfebSBarry Smith    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3544273d9f13SBarry Smith    matrices.
3545273d9f13SBarry Smith 
354669b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3547273d9f13SBarry Smith @*/
35487087cfbeSBarry Smith PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3549273d9f13SBarry Smith {
3550dfbe8321SBarry Smith   PetscErrorCode ierr;
3551273d9f13SBarry Smith 
3552273d9f13SBarry Smith   PetscFunctionBegin;
3553f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3554f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3555273d9f13SBarry Smith   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3556ab93d7beSBarry Smith   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3557273d9f13SBarry Smith   PetscFunctionReturn(0);
3558273d9f13SBarry Smith }
3559273d9f13SBarry Smith 
35604a2ae208SSatish Balay #undef __FUNCT__
35614a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3562273d9f13SBarry Smith /*@C
3563273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3564273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
3565273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3566273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3567273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
3568273d9f13SBarry Smith 
3569273d9f13SBarry Smith    Collective on MPI_Comm
3570273d9f13SBarry Smith 
3571273d9f13SBarry Smith    Input Parameters:
3572273d9f13SBarry Smith +  A - the matrix
3573273d9f13SBarry Smith .  bs - size of block
3574273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
3575273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
35760298fd71SBarry Smith          (possibly different for each block row) or NULL
3577273d9f13SBarry Smith 
3578273d9f13SBarry Smith    Options Database Keys:
3579273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
3580273d9f13SBarry Smith                      block calculations (much slower)
3581273d9f13SBarry Smith .    -mat_block_size - size of the blocks to use
3582273d9f13SBarry Smith 
3583273d9f13SBarry Smith    Level: intermediate
3584273d9f13SBarry Smith 
3585273d9f13SBarry Smith    Notes:
358649a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
358749a6f317SBarry Smith 
3588aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3589aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3590aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3591aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3592aa95bbe8SBarry Smith 
3593273d9f13SBarry Smith    The block AIJ format is fully compatible with standard Fortran 77
3594273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3595273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3596273d9f13SBarry Smith 
3597273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
35980298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
35990598bfebSBarry Smith    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3600273d9f13SBarry Smith 
360169b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3602273d9f13SBarry Smith @*/
36037087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3604273d9f13SBarry Smith {
36054ac538c5SBarry Smith   PetscErrorCode ierr;
3606273d9f13SBarry Smith 
3607273d9f13SBarry Smith   PetscFunctionBegin;
36086ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
36096ba663aaSJed Brown   PetscValidType(B,1);
36106ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
36114ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3612273d9f13SBarry Smith   PetscFunctionReturn(0);
3613273d9f13SBarry Smith }
3614a1d92eedSBarry Smith 
3615c75a6043SHong Zhang #undef __FUNCT__
3616725b52f3SLisandro Dalcin #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3617725b52f3SLisandro Dalcin /*@C
3618725b52f3SLisandro Dalcin    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3619725b52f3SLisandro Dalcin    (the default sequential PETSc format).
3620725b52f3SLisandro Dalcin 
3621725b52f3SLisandro Dalcin    Collective on MPI_Comm
3622725b52f3SLisandro Dalcin 
3623725b52f3SLisandro Dalcin    Input Parameters:
3624725b52f3SLisandro Dalcin +  A - the matrix
3625725b52f3SLisandro Dalcin .  i - the indices into j for the start of each local row (starts with zero)
3626725b52f3SLisandro Dalcin .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3627725b52f3SLisandro Dalcin -  v - optional values in the matrix
3628725b52f3SLisandro Dalcin 
3629725b52f3SLisandro Dalcin    Level: developer
3630725b52f3SLisandro Dalcin 
3631725b52f3SLisandro Dalcin .keywords: matrix, aij, compressed row, sparse
3632725b52f3SLisandro Dalcin 
3633725b52f3SLisandro Dalcin .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3634725b52f3SLisandro Dalcin @*/
36357087cfbeSBarry Smith PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3636725b52f3SLisandro Dalcin {
36374ac538c5SBarry Smith   PetscErrorCode ierr;
3638725b52f3SLisandro Dalcin 
3639725b52f3SLisandro Dalcin   PetscFunctionBegin;
36406ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
36416ba663aaSJed Brown   PetscValidType(B,1);
36426ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
36434ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3644725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3645725b52f3SLisandro Dalcin }
3646725b52f3SLisandro Dalcin 
3647725b52f3SLisandro Dalcin 
3648725b52f3SLisandro Dalcin #undef __FUNCT__
3649c75a6043SHong Zhang #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3650c75a6043SHong Zhang /*@
3651dfb205c3SBarry Smith      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3652c75a6043SHong Zhang 
3653c75a6043SHong Zhang      Collective on MPI_Comm
3654c75a6043SHong Zhang 
3655c75a6043SHong Zhang    Input Parameters:
3656c75a6043SHong Zhang +  comm - must be an MPI communicator of size 1
3657c75a6043SHong Zhang .  bs - size of block
3658c75a6043SHong Zhang .  m - number of rows
3659c75a6043SHong Zhang .  n - number of columns
3660c75a6043SHong Zhang .  i - row indices
3661c75a6043SHong Zhang .  j - column indices
3662c75a6043SHong Zhang -  a - matrix values
3663c75a6043SHong Zhang 
3664c75a6043SHong Zhang    Output Parameter:
3665c75a6043SHong Zhang .  mat - the matrix
3666c75a6043SHong Zhang 
3667dfb205c3SBarry Smith    Level: advanced
3668c75a6043SHong Zhang 
3669c75a6043SHong Zhang    Notes:
3670c75a6043SHong Zhang        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3671c75a6043SHong Zhang     once the matrix is destroyed
3672c75a6043SHong Zhang 
3673c75a6043SHong Zhang        You cannot set new nonzero locations into this matrix, that will generate an error.
3674c75a6043SHong Zhang 
3675c75a6043SHong Zhang        The i and j indices are 0 based
3676c75a6043SHong Zhang 
3677dfb205c3SBarry Smith        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3678dfb205c3SBarry Smith 
3679dfb205c3SBarry Smith 
368069b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3681c75a6043SHong Zhang 
3682c75a6043SHong Zhang @*/
36837087cfbeSBarry Smith PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3684c75a6043SHong Zhang {
3685c75a6043SHong Zhang   PetscErrorCode ierr;
3686c75a6043SHong Zhang   PetscInt       ii;
3687c75a6043SHong Zhang   Mat_SeqBAIJ    *baij;
3688c75a6043SHong Zhang 
3689c75a6043SHong Zhang   PetscFunctionBegin;
3690e32f2f54SBarry Smith   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3691e32f2f54SBarry Smith   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3692c75a6043SHong Zhang 
3693c75a6043SHong Zhang   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3694c75a6043SHong Zhang   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3695c75a6043SHong Zhang   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3696c75a6043SHong Zhang   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3697c75a6043SHong Zhang   baij = (Mat_SeqBAIJ*)(*mat)->data;
3698c75a6043SHong Zhang   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
36991784c0f5SBarry Smith   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3700c75a6043SHong Zhang 
3701c75a6043SHong Zhang   baij->i = i;
3702c75a6043SHong Zhang   baij->j = j;
3703c75a6043SHong Zhang   baij->a = a;
370426fbe8dcSKarl Rupp 
3705c75a6043SHong Zhang   baij->singlemalloc = PETSC_FALSE;
3706c75a6043SHong Zhang   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3707e6b907acSBarry Smith   baij->free_a       = PETSC_FALSE;
3708e6b907acSBarry Smith   baij->free_ij      = PETSC_FALSE;
3709c75a6043SHong Zhang 
3710c75a6043SHong Zhang   for (ii=0; ii<m; ii++) {
3711c75a6043SHong Zhang     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3712c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG)
3713e32f2f54SBarry Smith     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3714c75a6043SHong Zhang #endif
3715c75a6043SHong Zhang   }
3716c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG)
3717c75a6043SHong Zhang   for (ii=0; ii<baij->i[m]; ii++) {
3718e32f2f54SBarry Smith     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3719e32f2f54SBarry Smith     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3720c75a6043SHong Zhang   }
3721c75a6043SHong Zhang #endif
3722c75a6043SHong Zhang 
3723c75a6043SHong Zhang   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3724c75a6043SHong Zhang   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3725c75a6043SHong Zhang   PetscFunctionReturn(0);
3726c75a6043SHong Zhang }
3727