xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision d24d420451aab1d024a728b511d7b61b40627436)
1*d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2*d24d4204SJose E. Roman 
3*d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64
4*d24d4204SJose E. Roman 
5*d24d4204SJose E. Roman /*
6*d24d4204SJose E. Roman     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7*d24d4204SJose E. Roman   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8*d24d4204SJose E. Roman */
9*d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10*d24d4204SJose E. Roman 
11*d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
12*d24d4204SJose E. Roman {
13*d24d4204SJose E. Roman   PetscErrorCode    ierr;
14*d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
15*d24d4204SJose E. Roman   PetscBool         iascii;
16*d24d4204SJose E. Roman   PetscViewerFormat format;
17*d24d4204SJose E. Roman   Mat               Adense;
18*d24d4204SJose E. Roman 
19*d24d4204SJose E. Roman   PetscFunctionBegin;
20*d24d4204SJose E. Roman   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
21*d24d4204SJose E. Roman   if (iascii) {
22*d24d4204SJose E. Roman     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
23*d24d4204SJose E. Roman     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
24*d24d4204SJose E. Roman       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
25*d24d4204SJose E. Roman       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
26*d24d4204SJose E. Roman       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
27*d24d4204SJose E. Roman       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
28*d24d4204SJose E. Roman       PetscFunctionReturn(0);
29*d24d4204SJose E. Roman     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
30*d24d4204SJose E. Roman       PetscFunctionReturn(0);
31*d24d4204SJose E. Roman     }
32*d24d4204SJose E. Roman   }
33*d24d4204SJose E. Roman   /* convert to dense format and call MatView() */
34*d24d4204SJose E. Roman   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
35*d24d4204SJose E. Roman   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
36*d24d4204SJose E. Roman   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
37*d24d4204SJose E. Roman   PetscFunctionReturn(0);
38*d24d4204SJose E. Roman }
39*d24d4204SJose E. Roman 
40*d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
41*d24d4204SJose E. Roman {
42*d24d4204SJose E. Roman   PetscErrorCode ierr;
43*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
44*d24d4204SJose E. Roman   PetscLogDouble isend[2],irecv[2];
45*d24d4204SJose E. Roman 
46*d24d4204SJose E. Roman   PetscFunctionBegin;
47*d24d4204SJose E. Roman   info->block_size = 1.0;
48*d24d4204SJose E. Roman 
49*d24d4204SJose E. Roman   isend[0] = a->lld*a->locc;     /* locally allocated */
50*d24d4204SJose E. Roman   isend[1] = a->locr*a->locc;    /* used submatrix */
51*d24d4204SJose E. Roman   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
52*d24d4204SJose E. Roman     info->nz_allocated   = isend[0];
53*d24d4204SJose E. Roman     info->nz_used        = isend[1];
54*d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_MAX) {
55*d24d4204SJose E. Roman     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
56*d24d4204SJose E. Roman     info->nz_allocated   = irecv[0];
57*d24d4204SJose E. Roman     info->nz_used        = irecv[1];
58*d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_SUM) {
59*d24d4204SJose E. Roman     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
60*d24d4204SJose E. Roman     info->nz_allocated   = irecv[0];
61*d24d4204SJose E. Roman     info->nz_used        = irecv[1];
62*d24d4204SJose E. Roman   }
63*d24d4204SJose E. Roman 
64*d24d4204SJose E. Roman   info->nz_unneeded       = 0;
65*d24d4204SJose E. Roman   info->assemblies        = A->num_ass;
66*d24d4204SJose E. Roman   info->mallocs           = 0;
67*d24d4204SJose E. Roman   info->memory            = ((PetscObject)A)->mem;
68*d24d4204SJose E. Roman   info->fill_ratio_given  = 0;
69*d24d4204SJose E. Roman   info->fill_ratio_needed = 0;
70*d24d4204SJose E. Roman   info->factor_mallocs    = 0;
71*d24d4204SJose E. Roman   PetscFunctionReturn(0);
72*d24d4204SJose E. Roman }
73*d24d4204SJose E. Roman 
74*d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
75*d24d4204SJose E. Roman {
76*d24d4204SJose E. Roman   PetscFunctionBegin;
77*d24d4204SJose E. Roman   switch (op) {
78*d24d4204SJose E. Roman     case MAT_NEW_NONZERO_LOCATIONS:
79*d24d4204SJose E. Roman     case MAT_NEW_NONZERO_LOCATION_ERR:
80*d24d4204SJose E. Roman     case MAT_NEW_NONZERO_ALLOCATION_ERR:
81*d24d4204SJose E. Roman     case MAT_SYMMETRIC:
82*d24d4204SJose E. Roman     case MAT_SORTED_FULL:
83*d24d4204SJose E. Roman     case MAT_HERMITIAN:
84*d24d4204SJose E. Roman       break;
85*d24d4204SJose E. Roman     default:
86*d24d4204SJose E. Roman       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
87*d24d4204SJose E. Roman   }
88*d24d4204SJose E. Roman   PetscFunctionReturn(0);
89*d24d4204SJose E. Roman }
90*d24d4204SJose E. Roman 
91*d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
92*d24d4204SJose E. Roman {
93*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
94*d24d4204SJose E. Roman   PetscErrorCode ierr;
95*d24d4204SJose E. Roman   PetscInt       i,j;
96*d24d4204SJose E. Roman   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
97*d24d4204SJose E. Roman 
98*d24d4204SJose E. Roman   PetscFunctionBegin;
99*d24d4204SJose E. Roman   for (i=0;i<nr;i++) {
100*d24d4204SJose E. Roman     if (rows[i] < 0) continue;
101*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
102*d24d4204SJose E. Roman     for (j=0;j<nc;j++) {
103*d24d4204SJose E. Roman       if (cols[j] < 0) continue;
104*d24d4204SJose E. Roman       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
105*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
106*d24d4204SJose E. Roman       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
107*d24d4204SJose E. Roman         switch (imode) {
108*d24d4204SJose E. Roman           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
109*d24d4204SJose E. Roman           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
110*d24d4204SJose E. Roman           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
111*d24d4204SJose E. Roman         }
112*d24d4204SJose E. Roman       } else {
113*d24d4204SJose E. Roman         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
114*d24d4204SJose E. Roman         A->assembled = PETSC_FALSE;
115*d24d4204SJose E. Roman         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
116*d24d4204SJose E. Roman       }
117*d24d4204SJose E. Roman     }
118*d24d4204SJose E. Roman   }
119*d24d4204SJose E. Roman   PetscFunctionReturn(0);
120*d24d4204SJose E. Roman }
121*d24d4204SJose E. Roman 
122*d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
123*d24d4204SJose E. Roman {
124*d24d4204SJose E. Roman   PetscErrorCode ierr;
125*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
126*d24d4204SJose E. Roman   PetscScalar    *x2d,*y2d,alpha=1.0;
127*d24d4204SJose E. Roman   const PetscInt *ranges;
128*d24d4204SJose E. Roman   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
129*d24d4204SJose E. Roman 
130*d24d4204SJose E. Roman   PetscFunctionBegin;
131*d24d4204SJose E. Roman   if (transpose) {
132*d24d4204SJose E. Roman 
133*d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
134*d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
135*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
136*d24d4204SJose E. Roman     xlld = PetscMax(1,A->rmap->n);
137*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
138*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
139*d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
140*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
141*d24d4204SJose E. Roman     ylld = 1;
142*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
143*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
144*d24d4204SJose E. Roman 
145*d24d4204SJose E. Roman     /* allocate 2d vectors */
146*d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
147*d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
148*d24d4204SJose E. Roman     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
149*d24d4204SJose E. Roman     xlld = PetscMax(1,lszx);
150*d24d4204SJose E. Roman 
151*d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
152*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
153*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
154*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
155*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
156*d24d4204SJose E. Roman 
157*d24d4204SJose E. Roman     /* redistribute x as a column of a 2d matrix */
158*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
159*d24d4204SJose E. Roman 
160*d24d4204SJose E. Roman     /* redistribute y as a row of a 2d matrix */
161*d24d4204SJose E. Roman     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
162*d24d4204SJose E. Roman 
163*d24d4204SJose E. Roman     /* call PBLAS subroutine */
164*d24d4204SJose E. Roman     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
165*d24d4204SJose E. Roman 
166*d24d4204SJose E. Roman     /* redistribute y from a row of a 2d matrix */
167*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
168*d24d4204SJose E. Roman 
169*d24d4204SJose E. Roman   } else {   /* non-transpose */
170*d24d4204SJose E. Roman 
171*d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
172*d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
173*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
174*d24d4204SJose E. Roman     xlld = 1;
175*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
176*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
177*d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
178*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
179*d24d4204SJose E. Roman     ylld = PetscMax(1,A->rmap->n);
180*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
181*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
182*d24d4204SJose E. Roman 
183*d24d4204SJose E. Roman     /* allocate 2d vectors */
184*d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
185*d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
186*d24d4204SJose E. Roman     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
187*d24d4204SJose E. Roman     ylld = PetscMax(1,lszy);
188*d24d4204SJose E. Roman 
189*d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
190*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
191*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
192*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
193*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
194*d24d4204SJose E. Roman 
195*d24d4204SJose E. Roman     /* redistribute x as a row of a 2d matrix */
196*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
197*d24d4204SJose E. Roman 
198*d24d4204SJose E. Roman     /* redistribute y as a column of a 2d matrix */
199*d24d4204SJose E. Roman     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
200*d24d4204SJose E. Roman 
201*d24d4204SJose E. Roman     /* call PBLAS subroutine */
202*d24d4204SJose E. Roman     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
203*d24d4204SJose E. Roman 
204*d24d4204SJose E. Roman     /* redistribute y from a column of a 2d matrix */
205*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
206*d24d4204SJose E. Roman 
207*d24d4204SJose E. Roman   }
208*d24d4204SJose E. Roman   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
209*d24d4204SJose E. Roman   PetscFunctionReturn(0);
210*d24d4204SJose E. Roman }
211*d24d4204SJose E. Roman 
212*d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
213*d24d4204SJose E. Roman {
214*d24d4204SJose E. Roman   PetscErrorCode    ierr;
215*d24d4204SJose E. Roman   const PetscScalar *xarray;
216*d24d4204SJose E. Roman   PetscScalar       *yarray;
217*d24d4204SJose E. Roman 
218*d24d4204SJose E. Roman   PetscFunctionBegin;
219*d24d4204SJose E. Roman   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
220*d24d4204SJose E. Roman   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
221*d24d4204SJose E. Roman   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
222*d24d4204SJose E. Roman   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
223*d24d4204SJose E. Roman   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
224*d24d4204SJose E. Roman   PetscFunctionReturn(0);
225*d24d4204SJose E. Roman }
226*d24d4204SJose E. Roman 
227*d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
228*d24d4204SJose E. Roman {
229*d24d4204SJose E. Roman   PetscErrorCode    ierr;
230*d24d4204SJose E. Roman   const PetscScalar *xarray;
231*d24d4204SJose E. Roman   PetscScalar       *yarray;
232*d24d4204SJose E. Roman 
233*d24d4204SJose E. Roman   PetscFunctionBegin;
234*d24d4204SJose E. Roman   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
235*d24d4204SJose E. Roman   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
236*d24d4204SJose E. Roman   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
237*d24d4204SJose E. Roman   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
238*d24d4204SJose E. Roman   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
239*d24d4204SJose E. Roman   PetscFunctionReturn(0);
240*d24d4204SJose E. Roman }
241*d24d4204SJose E. Roman 
242*d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
243*d24d4204SJose E. Roman {
244*d24d4204SJose E. Roman   PetscErrorCode    ierr;
245*d24d4204SJose E. Roman   const PetscScalar *xarray;
246*d24d4204SJose E. Roman   PetscScalar       *zarray;
247*d24d4204SJose E. Roman 
248*d24d4204SJose E. Roman   PetscFunctionBegin;
249*d24d4204SJose E. Roman   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
250*d24d4204SJose E. Roman   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
251*d24d4204SJose E. Roman   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
252*d24d4204SJose E. Roman   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
253*d24d4204SJose E. Roman   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
254*d24d4204SJose E. Roman   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
255*d24d4204SJose E. Roman   PetscFunctionReturn(0);
256*d24d4204SJose E. Roman }
257*d24d4204SJose E. Roman 
258*d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
259*d24d4204SJose E. Roman {
260*d24d4204SJose E. Roman   PetscErrorCode    ierr;
261*d24d4204SJose E. Roman   const PetscScalar *xarray;
262*d24d4204SJose E. Roman   PetscScalar       *zarray;
263*d24d4204SJose E. Roman 
264*d24d4204SJose E. Roman   PetscFunctionBegin;
265*d24d4204SJose E. Roman   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
266*d24d4204SJose E. Roman   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
267*d24d4204SJose E. Roman   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
268*d24d4204SJose E. Roman   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
269*d24d4204SJose E. Roman   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
270*d24d4204SJose E. Roman   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
271*d24d4204SJose E. Roman   PetscFunctionReturn(0);
272*d24d4204SJose E. Roman }
273*d24d4204SJose E. Roman 
274*d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
275*d24d4204SJose E. Roman {
276*d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
277*d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
278*d24d4204SJose E. Roman   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
279*d24d4204SJose E. Roman   PetscScalar   sone=1.0,zero=0.0;
280*d24d4204SJose E. Roman   PetscBLASInt  one=1;
281*d24d4204SJose E. Roman 
282*d24d4204SJose E. Roman   PetscFunctionBegin;
283*d24d4204SJose E. Roman   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284*d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
285*d24d4204SJose E. Roman   PetscFunctionReturn(0);
286*d24d4204SJose E. Roman }
287*d24d4204SJose E. Roman 
288*d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289*d24d4204SJose E. Roman {
290*d24d4204SJose E. Roman   PetscErrorCode ierr;
291*d24d4204SJose E. Roman 
292*d24d4204SJose E. Roman   PetscFunctionBegin;
293*d24d4204SJose E. Roman   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
294*d24d4204SJose E. Roman   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
295*d24d4204SJose E. Roman   ierr = MatSetUp(C);CHKERRQ(ierr);
296*d24d4204SJose E. Roman   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
297*d24d4204SJose E. Roman   PetscFunctionReturn(0);
298*d24d4204SJose E. Roman }
299*d24d4204SJose E. Roman 
300*d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
301*d24d4204SJose E. Roman {
302*d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
303*d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
304*d24d4204SJose E. Roman   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
305*d24d4204SJose E. Roman   PetscScalar   sone=1.0,zero=0.0;
306*d24d4204SJose E. Roman   PetscBLASInt  one=1;
307*d24d4204SJose E. Roman 
308*d24d4204SJose E. Roman   PetscFunctionBegin;
309*d24d4204SJose E. Roman   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
310*d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
311*d24d4204SJose E. Roman   PetscFunctionReturn(0);
312*d24d4204SJose E. Roman }
313*d24d4204SJose E. Roman 
314*d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
315*d24d4204SJose E. Roman {
316*d24d4204SJose E. Roman   PetscErrorCode ierr;
317*d24d4204SJose E. Roman 
318*d24d4204SJose E. Roman   PetscFunctionBegin;
319*d24d4204SJose E. Roman   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
320*d24d4204SJose E. Roman   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
321*d24d4204SJose E. Roman   ierr = MatSetUp(C);CHKERRQ(ierr);
322*d24d4204SJose E. Roman   PetscFunctionReturn(0);
323*d24d4204SJose E. Roman }
324*d24d4204SJose E. Roman 
325*d24d4204SJose E. Roman /* --------------------------------------- */
326*d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
327*d24d4204SJose E. Roman {
328*d24d4204SJose E. Roman   PetscFunctionBegin;
329*d24d4204SJose E. Roman   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
330*d24d4204SJose E. Roman   C->ops->productsymbolic = MatProductSymbolic_AB;
331*d24d4204SJose E. Roman   PetscFunctionReturn(0);
332*d24d4204SJose E. Roman }
333*d24d4204SJose E. Roman 
334*d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
335*d24d4204SJose E. Roman {
336*d24d4204SJose E. Roman   PetscFunctionBegin;
337*d24d4204SJose E. Roman   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
338*d24d4204SJose E. Roman   C->ops->productsymbolic          = MatProductSymbolic_ABt;
339*d24d4204SJose E. Roman   PetscFunctionReturn(0);
340*d24d4204SJose E. Roman }
341*d24d4204SJose E. Roman 
342*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
343*d24d4204SJose E. Roman {
344*d24d4204SJose E. Roman   PetscErrorCode ierr;
345*d24d4204SJose E. Roman   Mat_Product    *product = C->product;
346*d24d4204SJose E. Roman 
347*d24d4204SJose E. Roman   PetscFunctionBegin;
348*d24d4204SJose E. Roman   switch (product->type) {
349*d24d4204SJose E. Roman     case MATPRODUCT_AB:
350*d24d4204SJose E. Roman       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
351*d24d4204SJose E. Roman       break;
352*d24d4204SJose E. Roman     case MATPRODUCT_ABt:
353*d24d4204SJose E. Roman       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
354*d24d4204SJose E. Roman       break;
355*d24d4204SJose E. Roman     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
356*d24d4204SJose E. Roman   }
357*d24d4204SJose E. Roman   PetscFunctionReturn(0);
358*d24d4204SJose E. Roman }
359*d24d4204SJose E. Roman /* --------------------------------------- */
360*d24d4204SJose E. Roman 
361*d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
362*d24d4204SJose E. Roman {
363*d24d4204SJose E. Roman   PetscErrorCode    ierr;
364*d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
365*d24d4204SJose E. Roman   PetscScalar       *darray,*d2d,v;
366*d24d4204SJose E. Roman   const PetscInt    *ranges;
367*d24d4204SJose E. Roman   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
368*d24d4204SJose E. Roman 
369*d24d4204SJose E. Roman   PetscFunctionBegin;
370*d24d4204SJose E. Roman   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
371*d24d4204SJose E. Roman 
372*d24d4204SJose E. Roman   if (A->rmap->N<=A->cmap->N) {   /* row version */
373*d24d4204SJose E. Roman 
374*d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
375*d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
376*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
377*d24d4204SJose E. Roman     dlld = PetscMax(1,A->rmap->n);
378*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
379*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
380*d24d4204SJose E. Roman 
381*d24d4204SJose E. Roman     /* allocate 2d vector */
382*d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383*d24d4204SJose E. Roman     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
384*d24d4204SJose E. Roman     dlld = PetscMax(1,lszd);
385*d24d4204SJose E. Roman 
386*d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
387*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
388*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
389*d24d4204SJose E. Roman 
390*d24d4204SJose E. Roman     /* collect diagonal */
391*d24d4204SJose E. Roman     for (j=1;j<=a->M;j++) {
392*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394*d24d4204SJose E. Roman     }
395*d24d4204SJose E. Roman 
396*d24d4204SJose E. Roman     /* redistribute d from a column of a 2d matrix */
397*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398*d24d4204SJose E. Roman     ierr = PetscFree(d2d);CHKERRQ(ierr);
399*d24d4204SJose E. Roman 
400*d24d4204SJose E. Roman   } else {   /* column version */
401*d24d4204SJose E. Roman 
402*d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
403*d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
404*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
405*d24d4204SJose E. Roman     dlld = 1;
406*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
407*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
408*d24d4204SJose E. Roman 
409*d24d4204SJose E. Roman     /* allocate 2d vector */
410*d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411*d24d4204SJose E. Roman     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
412*d24d4204SJose E. Roman 
413*d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
414*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
415*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
416*d24d4204SJose E. Roman 
417*d24d4204SJose E. Roman     /* collect diagonal */
418*d24d4204SJose E. Roman     for (j=1;j<=a->N;j++) {
419*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421*d24d4204SJose E. Roman     }
422*d24d4204SJose E. Roman 
423*d24d4204SJose E. Roman     /* redistribute d from a row of a 2d matrix */
424*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425*d24d4204SJose E. Roman     ierr = PetscFree(d2d);CHKERRQ(ierr);
426*d24d4204SJose E. Roman   }
427*d24d4204SJose E. Roman 
428*d24d4204SJose E. Roman   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
429*d24d4204SJose E. Roman   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
430*d24d4204SJose E. Roman   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
431*d24d4204SJose E. Roman   PetscFunctionReturn(0);
432*d24d4204SJose E. Roman }
433*d24d4204SJose E. Roman 
434*d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435*d24d4204SJose E. Roman {
436*d24d4204SJose E. Roman   PetscErrorCode    ierr;
437*d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
438*d24d4204SJose E. Roman   const PetscScalar *d;
439*d24d4204SJose E. Roman   const PetscInt    *ranges;
440*d24d4204SJose E. Roman   PetscScalar       *d2d;
441*d24d4204SJose E. Roman   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
442*d24d4204SJose E. Roman 
443*d24d4204SJose E. Roman   PetscFunctionBegin;
444*d24d4204SJose E. Roman   if (R) {
445*d24d4204SJose E. Roman     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
446*d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
447*d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
448*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
449*d24d4204SJose E. Roman     dlld = 1;
450*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
451*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
452*d24d4204SJose E. Roman 
453*d24d4204SJose E. Roman     /* allocate 2d vector */
454*d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
455*d24d4204SJose E. Roman     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
456*d24d4204SJose E. Roman 
457*d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
458*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
459*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
460*d24d4204SJose E. Roman 
461*d24d4204SJose E. Roman     /* redistribute d to a row of a 2d matrix */
462*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
463*d24d4204SJose E. Roman 
464*d24d4204SJose E. Roman     /* broadcast along process columns */
465*d24d4204SJose E. Roman     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
466*d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
467*d24d4204SJose E. Roman 
468*d24d4204SJose E. Roman     /* local scaling */
469*d24d4204SJose E. Roman     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
470*d24d4204SJose E. Roman 
471*d24d4204SJose E. Roman     ierr = PetscFree(d2d);CHKERRQ(ierr);
472*d24d4204SJose E. Roman     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
473*d24d4204SJose E. Roman   }
474*d24d4204SJose E. Roman   if (L) {
475*d24d4204SJose E. Roman     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
476*d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
477*d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
478*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
479*d24d4204SJose E. Roman     dlld = PetscMax(1,A->rmap->n);
480*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
481*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
482*d24d4204SJose E. Roman 
483*d24d4204SJose E. Roman     /* allocate 2d vector */
484*d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
485*d24d4204SJose E. Roman     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
486*d24d4204SJose E. Roman     dlld = PetscMax(1,lszd);
487*d24d4204SJose E. Roman 
488*d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
489*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
490*d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
491*d24d4204SJose E. Roman 
492*d24d4204SJose E. Roman     /* redistribute d to a column of a 2d matrix */
493*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
494*d24d4204SJose E. Roman 
495*d24d4204SJose E. Roman     /* broadcast along process rows */
496*d24d4204SJose E. Roman     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
497*d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
498*d24d4204SJose E. Roman 
499*d24d4204SJose E. Roman     /* local scaling */
500*d24d4204SJose E. Roman     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
501*d24d4204SJose E. Roman 
502*d24d4204SJose E. Roman     ierr = PetscFree(d2d);CHKERRQ(ierr);
503*d24d4204SJose E. Roman     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
504*d24d4204SJose E. Roman   }
505*d24d4204SJose E. Roman   PetscFunctionReturn(0);
506*d24d4204SJose E. Roman }
507*d24d4204SJose E. Roman 
508*d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
509*d24d4204SJose E. Roman {
510*d24d4204SJose E. Roman   PetscFunctionBegin;
511*d24d4204SJose E. Roman   *missing = PETSC_FALSE;
512*d24d4204SJose E. Roman   PetscFunctionReturn(0);
513*d24d4204SJose E. Roman }
514*d24d4204SJose E. Roman 
515*d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
516*d24d4204SJose E. Roman {
517*d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
518*d24d4204SJose E. Roman   PetscBLASInt  n,one=1;
519*d24d4204SJose E. Roman 
520*d24d4204SJose E. Roman   PetscFunctionBegin;
521*d24d4204SJose E. Roman   n = x->lld*x->locc;
522*d24d4204SJose E. Roman   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
523*d24d4204SJose E. Roman   PetscFunctionReturn(0);
524*d24d4204SJose E. Roman }
525*d24d4204SJose E. Roman 
526*d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
527*d24d4204SJose E. Roman {
528*d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
529*d24d4204SJose E. Roman   PetscBLASInt  i,n;
530*d24d4204SJose E. Roman   PetscScalar   v;
531*d24d4204SJose E. Roman 
532*d24d4204SJose E. Roman   PetscFunctionBegin;
533*d24d4204SJose E. Roman   n = PetscMin(x->M,x->N);
534*d24d4204SJose E. Roman   for (i=1;i<=n;i++) {
535*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
536*d24d4204SJose E. Roman     v += alpha;
537*d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
538*d24d4204SJose E. Roman   }
539*d24d4204SJose E. Roman   PetscFunctionReturn(0);
540*d24d4204SJose E. Roman }
541*d24d4204SJose E. Roman 
542*d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
543*d24d4204SJose E. Roman {
544*d24d4204SJose E. Roman   PetscErrorCode ierr;
545*d24d4204SJose E. Roman   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
546*d24d4204SJose E. Roman   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
547*d24d4204SJose E. Roman   PetscBLASInt   one=1;
548*d24d4204SJose E. Roman   PetscScalar    beta=1.0;
549*d24d4204SJose E. Roman 
550*d24d4204SJose E. Roman   PetscFunctionBegin;
551*d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(Y,1,X,3);
552*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
553*d24d4204SJose E. Roman   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
554*d24d4204SJose E. Roman   PetscFunctionReturn(0);
555*d24d4204SJose E. Roman }
556*d24d4204SJose E. Roman 
557*d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
558*d24d4204SJose E. Roman {
559*d24d4204SJose E. Roman   PetscErrorCode ierr;
560*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
561*d24d4204SJose E. Roman   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
562*d24d4204SJose E. Roman 
563*d24d4204SJose E. Roman   PetscFunctionBegin;
564*d24d4204SJose E. Roman   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
565*d24d4204SJose E. Roman   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
566*d24d4204SJose E. Roman   PetscFunctionReturn(0);
567*d24d4204SJose E. Roman }
568*d24d4204SJose E. Roman 
569*d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
570*d24d4204SJose E. Roman {
571*d24d4204SJose E. Roman   Mat            Bs;
572*d24d4204SJose E. Roman   MPI_Comm       comm;
573*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
574*d24d4204SJose E. Roman   PetscErrorCode ierr;
575*d24d4204SJose E. Roman 
576*d24d4204SJose E. Roman   PetscFunctionBegin;
577*d24d4204SJose E. Roman   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
578*d24d4204SJose E. Roman   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
579*d24d4204SJose E. Roman   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
580*d24d4204SJose E. Roman   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
581*d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
582*d24d4204SJose E. Roman   b->M    = a->M;
583*d24d4204SJose E. Roman   b->N    = a->N;
584*d24d4204SJose E. Roman   b->mb   = a->mb;
585*d24d4204SJose E. Roman   b->nb   = a->nb;
586*d24d4204SJose E. Roman   b->rsrc = a->rsrc;
587*d24d4204SJose E. Roman   b->csrc = a->csrc;
588*d24d4204SJose E. Roman   ierr = MatSetUp(Bs);CHKERRQ(ierr);
589*d24d4204SJose E. Roman   *B = Bs;
590*d24d4204SJose E. Roman   if (op == MAT_COPY_VALUES) {
591*d24d4204SJose E. Roman     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
592*d24d4204SJose E. Roman   }
593*d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
594*d24d4204SJose E. Roman   PetscFunctionReturn(0);
595*d24d4204SJose E. Roman }
596*d24d4204SJose E. Roman 
597*d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
598*d24d4204SJose E. Roman {
599*d24d4204SJose E. Roman   PetscErrorCode ierr;
600*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601*d24d4204SJose E. Roman   Mat            Bs = *B;
602*d24d4204SJose E. Roman   PetscBLASInt   one=1;
603*d24d4204SJose E. Roman   PetscScalar    sone=1.0,zero=0.0;
604*d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
605*d24d4204SJose E. Roman   PetscInt       i,j;
606*d24d4204SJose E. Roman #endif
607*d24d4204SJose E. Roman 
608*d24d4204SJose E. Roman   PetscFunctionBegin;
609*d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
610*d24d4204SJose E. Roman     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
611*d24d4204SJose E. Roman     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
612*d24d4204SJose E. Roman     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
613*d24d4204SJose E. Roman     ierr = MatSetUp(Bs);CHKERRQ(ierr);
614*d24d4204SJose E. Roman     *B = Bs;
615*d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
616*d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
617*d24d4204SJose E. Roman   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
618*d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
619*d24d4204SJose E. Roman   /* undo conjugation */
620*d24d4204SJose E. Roman   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
621*d24d4204SJose E. Roman #endif
622*d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
623*d24d4204SJose E. Roman   PetscFunctionReturn(0);
624*d24d4204SJose E. Roman }
625*d24d4204SJose E. Roman 
626*d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
627*d24d4204SJose E. Roman {
628*d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
629*d24d4204SJose E. Roman   PetscInt      i,j;
630*d24d4204SJose E. Roman 
631*d24d4204SJose E. Roman   PetscFunctionBegin;
632*d24d4204SJose E. Roman   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
633*d24d4204SJose E. Roman   PetscFunctionReturn(0);
634*d24d4204SJose E. Roman }
635*d24d4204SJose E. Roman 
636*d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
637*d24d4204SJose E. Roman {
638*d24d4204SJose E. Roman   PetscErrorCode ierr;
639*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
640*d24d4204SJose E. Roman   Mat            Bs = *B;
641*d24d4204SJose E. Roman   PetscBLASInt   one=1;
642*d24d4204SJose E. Roman   PetscScalar    sone=1.0,zero=0.0;
643*d24d4204SJose E. Roman 
644*d24d4204SJose E. Roman   PetscFunctionBegin;
645*d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
646*d24d4204SJose E. Roman     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
647*d24d4204SJose E. Roman     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
648*d24d4204SJose E. Roman     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
649*d24d4204SJose E. Roman     ierr = MatSetUp(Bs);CHKERRQ(ierr);
650*d24d4204SJose E. Roman     *B = Bs;
651*d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
652*d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
653*d24d4204SJose E. Roman   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
654*d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
655*d24d4204SJose E. Roman   PetscFunctionReturn(0);
656*d24d4204SJose E. Roman }
657*d24d4204SJose E. Roman 
658*d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
659*d24d4204SJose E. Roman {
660*d24d4204SJose E. Roman   PetscErrorCode ierr;
661*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
662*d24d4204SJose E. Roman   PetscScalar    *x,*x2d;
663*d24d4204SJose E. Roman   const PetscInt *ranges;
664*d24d4204SJose E. Roman   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
665*d24d4204SJose E. Roman 
666*d24d4204SJose E. Roman   PetscFunctionBegin;
667*d24d4204SJose E. Roman   ierr = VecCopy(B,X);CHKERRQ(ierr);
668*d24d4204SJose E. Roman   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
669*d24d4204SJose E. Roman 
670*d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
671*d24d4204SJose E. Roman   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
672*d24d4204SJose E. Roman   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
673*d24d4204SJose E. Roman   xlld = PetscMax(1,A->rmap->n);
674*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
675*d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
676*d24d4204SJose E. Roman 
677*d24d4204SJose E. Roman   /* allocate 2d vector */
678*d24d4204SJose E. Roman   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
679*d24d4204SJose E. Roman   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
680*d24d4204SJose E. Roman   xlld = PetscMax(1,lszx);
681*d24d4204SJose E. Roman 
682*d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
683*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
684*d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
685*d24d4204SJose E. Roman 
686*d24d4204SJose E. Roman   /* redistribute x as a column of a 2d matrix */
687*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
688*d24d4204SJose E. Roman 
689*d24d4204SJose E. Roman   /* call ScaLAPACK subroutine */
690*d24d4204SJose E. Roman   switch (A->factortype) {
691*d24d4204SJose E. Roman     case MAT_FACTOR_LU:
692*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
693*d24d4204SJose E. Roman       PetscCheckScaLapackInfo("getrs",info);
694*d24d4204SJose E. Roman       break;
695*d24d4204SJose E. Roman     case MAT_FACTOR_CHOLESKY:
696*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
697*d24d4204SJose E. Roman       PetscCheckScaLapackInfo("potrs",info);
698*d24d4204SJose E. Roman       break;
699*d24d4204SJose E. Roman     default:
700*d24d4204SJose E. Roman       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
701*d24d4204SJose E. Roman       break;
702*d24d4204SJose E. Roman   }
703*d24d4204SJose E. Roman 
704*d24d4204SJose E. Roman   /* redistribute x from a column of a 2d matrix */
705*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
706*d24d4204SJose E. Roman 
707*d24d4204SJose E. Roman   ierr = PetscFree(x2d);CHKERRQ(ierr);
708*d24d4204SJose E. Roman   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
709*d24d4204SJose E. Roman   PetscFunctionReturn(0);
710*d24d4204SJose E. Roman }
711*d24d4204SJose E. Roman 
712*d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
713*d24d4204SJose E. Roman {
714*d24d4204SJose E. Roman   PetscErrorCode ierr;
715*d24d4204SJose E. Roman 
716*d24d4204SJose E. Roman   PetscFunctionBegin;
717*d24d4204SJose E. Roman   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
718*d24d4204SJose E. Roman   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
719*d24d4204SJose E. Roman   PetscFunctionReturn(0);
720*d24d4204SJose E. Roman }
721*d24d4204SJose E. Roman 
722*d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
723*d24d4204SJose E. Roman {
724*d24d4204SJose E. Roman   PetscErrorCode ierr;
725*d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
726*d24d4204SJose E. Roman   PetscBool      flg1,flg2;
727*d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
728*d24d4204SJose E. Roman 
729*d24d4204SJose E. Roman   PetscFunctionBegin;
730*d24d4204SJose E. Roman   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
731*d24d4204SJose E. Roman   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
732*d24d4204SJose E. Roman   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
733*d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(B,1,X,2);
734*d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)B->data;
735*d24d4204SJose E. Roman   x = (Mat_ScaLAPACK*)X->data;
736*d24d4204SJose E. Roman   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
737*d24d4204SJose E. Roman 
738*d24d4204SJose E. Roman   switch (A->factortype) {
739*d24d4204SJose E. Roman     case MAT_FACTOR_LU:
740*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
741*d24d4204SJose E. Roman       PetscCheckScaLapackInfo("getrs",info);
742*d24d4204SJose E. Roman       break;
743*d24d4204SJose E. Roman     case MAT_FACTOR_CHOLESKY:
744*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
745*d24d4204SJose E. Roman       PetscCheckScaLapackInfo("potrs",info);
746*d24d4204SJose E. Roman       break;
747*d24d4204SJose E. Roman     default:
748*d24d4204SJose E. Roman       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
749*d24d4204SJose E. Roman       break;
750*d24d4204SJose E. Roman   }
751*d24d4204SJose E. Roman   PetscFunctionReturn(0);
752*d24d4204SJose E. Roman }
753*d24d4204SJose E. Roman 
754*d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
755*d24d4204SJose E. Roman {
756*d24d4204SJose E. Roman   PetscErrorCode ierr;
757*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
758*d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
759*d24d4204SJose E. Roman 
760*d24d4204SJose E. Roman   PetscFunctionBegin;
761*d24d4204SJose E. Roman   if (!a->pivots) {
762*d24d4204SJose E. Roman     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
763*d24d4204SJose E. Roman     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
764*d24d4204SJose E. Roman   }
765*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
766*d24d4204SJose E. Roman   PetscCheckScaLapackInfo("getrf",info);
767*d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_LU;
768*d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
769*d24d4204SJose E. Roman 
770*d24d4204SJose E. Roman   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
771*d24d4204SJose E. Roman   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
772*d24d4204SJose E. Roman   PetscFunctionReturn(0);
773*d24d4204SJose E. Roman }
774*d24d4204SJose E. Roman 
775*d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776*d24d4204SJose E. Roman {
777*d24d4204SJose E. Roman   PetscErrorCode ierr;
778*d24d4204SJose E. Roman 
779*d24d4204SJose E. Roman   PetscFunctionBegin;
780*d24d4204SJose E. Roman   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
781*d24d4204SJose E. Roman   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
782*d24d4204SJose E. Roman   PetscFunctionReturn(0);
783*d24d4204SJose E. Roman }
784*d24d4204SJose E. Roman 
785*d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
786*d24d4204SJose E. Roman {
787*d24d4204SJose E. Roman   PetscFunctionBegin;
788*d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789*d24d4204SJose E. Roman   PetscFunctionReturn(0);
790*d24d4204SJose E. Roman }
791*d24d4204SJose E. Roman 
792*d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
793*d24d4204SJose E. Roman {
794*d24d4204SJose E. Roman   PetscErrorCode ierr;
795*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
796*d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
797*d24d4204SJose E. Roman 
798*d24d4204SJose E. Roman   PetscFunctionBegin;
799*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
800*d24d4204SJose E. Roman   PetscCheckScaLapackInfo("potrf",info);
801*d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_CHOLESKY;
802*d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
803*d24d4204SJose E. Roman 
804*d24d4204SJose E. Roman   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
805*d24d4204SJose E. Roman   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
806*d24d4204SJose E. Roman   PetscFunctionReturn(0);
807*d24d4204SJose E. Roman }
808*d24d4204SJose E. Roman 
809*d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
810*d24d4204SJose E. Roman {
811*d24d4204SJose E. Roman   PetscErrorCode ierr;
812*d24d4204SJose E. Roman 
813*d24d4204SJose E. Roman   PetscFunctionBegin;
814*d24d4204SJose E. Roman   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
815*d24d4204SJose E. Roman   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
816*d24d4204SJose E. Roman   PetscFunctionReturn(0);
817*d24d4204SJose E. Roman }
818*d24d4204SJose E. Roman 
819*d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
820*d24d4204SJose E. Roman {
821*d24d4204SJose E. Roman   PetscFunctionBegin;
822*d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
823*d24d4204SJose E. Roman   PetscFunctionReturn(0);
824*d24d4204SJose E. Roman }
825*d24d4204SJose E. Roman 
826*d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
827*d24d4204SJose E. Roman {
828*d24d4204SJose E. Roman   PetscFunctionBegin;
829*d24d4204SJose E. Roman   *type = MATSOLVERSCALAPACK;
830*d24d4204SJose E. Roman   PetscFunctionReturn(0);
831*d24d4204SJose E. Roman }
832*d24d4204SJose E. Roman 
833*d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
834*d24d4204SJose E. Roman {
835*d24d4204SJose E. Roman   Mat            B;
836*d24d4204SJose E. Roman   PetscErrorCode ierr;
837*d24d4204SJose E. Roman 
838*d24d4204SJose E. Roman   PetscFunctionBegin;
839*d24d4204SJose E. Roman   /* Create the factorization matrix */
840*d24d4204SJose E. Roman   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
841*d24d4204SJose E. Roman   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
842*d24d4204SJose E. Roman   ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
843*d24d4204SJose E. Roman   ierr = MatSetUp(B);CHKERRQ(ierr);
844*d24d4204SJose E. Roman   B->factortype = ftype;
845*d24d4204SJose E. Roman   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
846*d24d4204SJose E. Roman   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
847*d24d4204SJose E. Roman 
848*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
849*d24d4204SJose E. Roman   *F = B;
850*d24d4204SJose E. Roman   PetscFunctionReturn(0);
851*d24d4204SJose E. Roman }
852*d24d4204SJose E. Roman 
853*d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
854*d24d4204SJose E. Roman {
855*d24d4204SJose E. Roman   PetscErrorCode ierr;
856*d24d4204SJose E. Roman 
857*d24d4204SJose E. Roman   PetscFunctionBegin;
858*d24d4204SJose E. Roman   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
859*d24d4204SJose E. Roman   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
860*d24d4204SJose E. Roman   PetscFunctionReturn(0);
861*d24d4204SJose E. Roman }
862*d24d4204SJose E. Roman 
863*d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
864*d24d4204SJose E. Roman {
865*d24d4204SJose E. Roman   PetscErrorCode ierr;
866*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
867*d24d4204SJose E. Roman   PetscBLASInt   one=1,lwork=0;
868*d24d4204SJose E. Roman   const char     *ntype;
869*d24d4204SJose E. Roman   PetscScalar    *work,dummy;
870*d24d4204SJose E. Roman 
871*d24d4204SJose E. Roman   PetscFunctionBegin;
872*d24d4204SJose E. Roman   switch (type){
873*d24d4204SJose E. Roman     case NORM_1:
874*d24d4204SJose E. Roman       ntype = "1";
875*d24d4204SJose E. Roman       lwork = PetscMax(a->locr,a->locc);
876*d24d4204SJose E. Roman       break;
877*d24d4204SJose E. Roman     case NORM_FROBENIUS:
878*d24d4204SJose E. Roman       ntype = "F";
879*d24d4204SJose E. Roman       work  = &dummy;
880*d24d4204SJose E. Roman       break;
881*d24d4204SJose E. Roman     case NORM_INFINITY:
882*d24d4204SJose E. Roman       ntype = "I";
883*d24d4204SJose E. Roman       lwork = PetscMax(a->locr,a->locc);
884*d24d4204SJose E. Roman       break;
885*d24d4204SJose E. Roman     default:
886*d24d4204SJose E. Roman       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
887*d24d4204SJose E. Roman   }
888*d24d4204SJose E. Roman   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
889*d24d4204SJose E. Roman   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
890*d24d4204SJose E. Roman   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
891*d24d4204SJose E. Roman   PetscFunctionReturn(0);
892*d24d4204SJose E. Roman }
893*d24d4204SJose E. Roman 
894*d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
895*d24d4204SJose E. Roman {
896*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
897*d24d4204SJose E. Roman   PetscErrorCode ierr;
898*d24d4204SJose E. Roman 
899*d24d4204SJose E. Roman   PetscFunctionBegin;
900*d24d4204SJose E. Roman   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
901*d24d4204SJose E. Roman   PetscFunctionReturn(0);
902*d24d4204SJose E. Roman }
903*d24d4204SJose E. Roman 
904*d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
905*d24d4204SJose E. Roman {
906*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
907*d24d4204SJose E. Roman   PetscErrorCode ierr;
908*d24d4204SJose E. Roman   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
909*d24d4204SJose E. Roman 
910*d24d4204SJose E. Roman   PetscFunctionBegin;
911*d24d4204SJose E. Roman   if (rows) {
912*d24d4204SJose E. Roman     n     = a->locr;
913*d24d4204SJose E. Roman     nb    = a->mb;
914*d24d4204SJose E. Roman     isrc  = a->rsrc;
915*d24d4204SJose E. Roman     nproc = a->grid->nprow;
916*d24d4204SJose E. Roman     iproc = a->grid->myrow;
917*d24d4204SJose E. Roman     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
918*d24d4204SJose E. Roman     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
919*d24d4204SJose E. Roman     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
920*d24d4204SJose E. Roman   }
921*d24d4204SJose E. Roman   if (cols) {
922*d24d4204SJose E. Roman     n     = a->locc;
923*d24d4204SJose E. Roman     nb    = a->nb;
924*d24d4204SJose E. Roman     isrc  = a->csrc;
925*d24d4204SJose E. Roman     nproc = a->grid->npcol;
926*d24d4204SJose E. Roman     iproc = a->grid->mycol;
927*d24d4204SJose E. Roman     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
928*d24d4204SJose E. Roman     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929*d24d4204SJose E. Roman     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
930*d24d4204SJose E. Roman   }
931*d24d4204SJose E. Roman   PetscFunctionReturn(0);
932*d24d4204SJose E. Roman }
933*d24d4204SJose E. Roman 
934*d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
935*d24d4204SJose E. Roman {
936*d24d4204SJose E. Roman   PetscErrorCode ierr;
937*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
938*d24d4204SJose E. Roman   Mat            Bmpi;
939*d24d4204SJose E. Roman   MPI_Comm       comm;
940*d24d4204SJose E. Roman   const PetscInt *ranges;
941*d24d4204SJose E. Roman   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
942*d24d4204SJose E. Roman   PetscScalar    *barray;
943*d24d4204SJose E. Roman 
944*d24d4204SJose E. Roman   PetscFunctionBegin;
945*d24d4204SJose E. Roman   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
946*d24d4204SJose E. Roman 
947*d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
948*d24d4204SJose E. Roman   else {
949*d24d4204SJose E. Roman     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
950*d24d4204SJose E. Roman     ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
951*d24d4204SJose E. Roman     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
952*d24d4204SJose E. Roman     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
953*d24d4204SJose E. Roman   }
954*d24d4204SJose E. Roman 
955*d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for B (1d block distribution) */
956*d24d4204SJose E. Roman   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
957*d24d4204SJose E. Roman   ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
958*d24d4204SJose E. Roman   lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
959*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
960*d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
961*d24d4204SJose E. Roman 
962*d24d4204SJose E. Roman   /* redistribute matrix */
963*d24d4204SJose E. Roman   ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
964*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
965*d24d4204SJose E. Roman   ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
966*d24d4204SJose E. Roman 
967*d24d4204SJose E. Roman   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968*d24d4204SJose E. Roman   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969*d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) {
970*d24d4204SJose E. Roman     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
971*d24d4204SJose E. Roman   } else *B = Bmpi;
972*d24d4204SJose E. Roman   PetscFunctionReturn(0);
973*d24d4204SJose E. Roman }
974*d24d4204SJose E. Roman 
975*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
976*d24d4204SJose E. Roman {
977*d24d4204SJose E. Roman   PetscErrorCode ierr;
978*d24d4204SJose E. Roman   Mat_ScaLAPACK  *b;
979*d24d4204SJose E. Roman   Mat            Bmpi;
980*d24d4204SJose E. Roman   MPI_Comm       comm;
981*d24d4204SJose E. Roman   const PetscInt *ranges;
982*d24d4204SJose E. Roman   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
983*d24d4204SJose E. Roman   PetscScalar    *aarray;
984*d24d4204SJose E. Roman 
985*d24d4204SJose E. Roman   PetscFunctionBegin;
986*d24d4204SJose E. Roman   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
987*d24d4204SJose E. Roman 
988*d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
989*d24d4204SJose E. Roman   else {
990*d24d4204SJose E. Roman     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
991*d24d4204SJose E. Roman     ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
992*d24d4204SJose E. Roman     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
993*d24d4204SJose E. Roman     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
994*d24d4204SJose E. Roman   }
995*d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bmpi->data;
996*d24d4204SJose E. Roman 
997*d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for A (1d block distribution) */
998*d24d4204SJose E. Roman   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
999*d24d4204SJose E. Roman   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1000*d24d4204SJose E. Roman   lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1001*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1002*d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
1003*d24d4204SJose E. Roman 
1004*d24d4204SJose E. Roman   /* redistribute matrix */
1005*d24d4204SJose E. Roman   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1006*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1007*d24d4204SJose E. Roman   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1008*d24d4204SJose E. Roman 
1009*d24d4204SJose E. Roman   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1010*d24d4204SJose E. Roman   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1011*d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) {
1012*d24d4204SJose E. Roman     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1013*d24d4204SJose E. Roman   } else *B = Bmpi;
1014*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1015*d24d4204SJose E. Roman }
1016*d24d4204SJose E. Roman 
1017*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1018*d24d4204SJose E. Roman {
1019*d24d4204SJose E. Roman   Mat               mat_scal;
1020*d24d4204SJose E. Roman   PetscErrorCode    ierr;
1021*d24d4204SJose E. Roman   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1022*d24d4204SJose E. Roman   const PetscInt    *cols;
1023*d24d4204SJose E. Roman   const PetscScalar *vals;
1024*d24d4204SJose E. Roman 
1025*d24d4204SJose E. Roman   PetscFunctionBegin;
1026*d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1027*d24d4204SJose E. Roman     mat_scal = *newmat;
1028*d24d4204SJose E. Roman     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1029*d24d4204SJose E. Roman   } else {
1030*d24d4204SJose E. Roman     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1031*d24d4204SJose E. Roman     m = PETSC_DECIDE;
1032*d24d4204SJose E. Roman     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1033*d24d4204SJose E. Roman     n = PETSC_DECIDE;
1034*d24d4204SJose E. Roman     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1035*d24d4204SJose E. Roman     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1036*d24d4204SJose E. Roman     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1037*d24d4204SJose E. Roman     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1038*d24d4204SJose E. Roman   }
1039*d24d4204SJose E. Roman   for (row=rstart;row<rend;row++) {
1040*d24d4204SJose E. Roman     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1041*d24d4204SJose E. Roman     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1042*d24d4204SJose E. Roman     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1043*d24d4204SJose E. Roman   }
1044*d24d4204SJose E. Roman   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1045*d24d4204SJose E. Roman   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1046*d24d4204SJose E. Roman 
1047*d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1048*d24d4204SJose E. Roman   else *newmat = mat_scal;
1049*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1050*d24d4204SJose E. Roman }
1051*d24d4204SJose E. Roman 
1052*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1053*d24d4204SJose E. Roman {
1054*d24d4204SJose E. Roman   Mat               mat_scal;
1055*d24d4204SJose E. Roman   PetscErrorCode    ierr;
1056*d24d4204SJose E. Roman   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1057*d24d4204SJose E. Roman   const PetscInt    *cols;
1058*d24d4204SJose E. Roman   const PetscScalar *vals;
1059*d24d4204SJose E. Roman   PetscScalar       v;
1060*d24d4204SJose E. Roman 
1061*d24d4204SJose E. Roman   PetscFunctionBegin;
1062*d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1063*d24d4204SJose E. Roman     mat_scal = *newmat;
1064*d24d4204SJose E. Roman     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1065*d24d4204SJose E. Roman   } else {
1066*d24d4204SJose E. Roman     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1067*d24d4204SJose E. Roman     m = PETSC_DECIDE;
1068*d24d4204SJose E. Roman     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1069*d24d4204SJose E. Roman     n = PETSC_DECIDE;
1070*d24d4204SJose E. Roman     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1071*d24d4204SJose E. Roman     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1072*d24d4204SJose E. Roman     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1073*d24d4204SJose E. Roman     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1074*d24d4204SJose E. Roman   }
1075*d24d4204SJose E. Roman   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1076*d24d4204SJose E. Roman   for (row=rstart;row<rend;row++) {
1077*d24d4204SJose E. Roman     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1078*d24d4204SJose E. Roman     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1079*d24d4204SJose E. Roman     for (j=0;j<ncols;j++) { /* lower triangular part */
1080*d24d4204SJose E. Roman       if (cols[j] == row) continue;
1081*d24d4204SJose E. Roman       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1082*d24d4204SJose E. Roman       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1083*d24d4204SJose E. Roman     }
1084*d24d4204SJose E. Roman     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1085*d24d4204SJose E. Roman   }
1086*d24d4204SJose E. Roman   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1087*d24d4204SJose E. Roman   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1088*d24d4204SJose E. Roman   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1089*d24d4204SJose E. Roman 
1090*d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1091*d24d4204SJose E. Roman   else *newmat = mat_scal;
1092*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1093*d24d4204SJose E. Roman }
1094*d24d4204SJose E. Roman 
1095*d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1096*d24d4204SJose E. Roman {
1097*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1098*d24d4204SJose E. Roman   PetscErrorCode ierr;
1099*d24d4204SJose E. Roman   PetscInt       sz=0;
1100*d24d4204SJose E. Roman 
1101*d24d4204SJose E. Roman   PetscFunctionBegin;
1102*d24d4204SJose E. Roman   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1103*d24d4204SJose E. Roman   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1104*d24d4204SJose E. Roman   if (!a->lld) a->lld = a->locr;
1105*d24d4204SJose E. Roman 
1106*d24d4204SJose E. Roman   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1107*d24d4204SJose E. Roman   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1108*d24d4204SJose E. Roman   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1109*d24d4204SJose E. Roman   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1110*d24d4204SJose E. Roman 
1111*d24d4204SJose E. Roman   A->preallocated = PETSC_TRUE;
1112*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1113*d24d4204SJose E. Roman }
1114*d24d4204SJose E. Roman 
1115*d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1116*d24d4204SJose E. Roman {
1117*d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1118*d24d4204SJose E. Roman   PetscErrorCode     ierr;
1119*d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1120*d24d4204SJose E. Roman   PetscBool          flg;
1121*d24d4204SJose E. Roman   MPI_Comm           icomm;
1122*d24d4204SJose E. Roman 
1123*d24d4204SJose E. Roman   PetscFunctionBegin;
1124*d24d4204SJose E. Roman   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1125*d24d4204SJose E. Roman   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1126*d24d4204SJose E. Roman   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1127*d24d4204SJose E. Roman   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1128*d24d4204SJose E. Roman   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1129*d24d4204SJose E. Roman   if (--grid->grid_refct == 0) {
1130*d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxt);
1131*d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxrow);
1132*d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxcol);
1133*d24d4204SJose E. Roman     ierr = PetscFree(grid);CHKERRQ(ierr);
1134*d24d4204SJose E. Roman     ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
1135*d24d4204SJose E. Roman   }
1136*d24d4204SJose E. Roman   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1137*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1138*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1139*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1140*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1141*d24d4204SJose E. Roman   ierr = PetscFree(A->data);CHKERRQ(ierr);
1142*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1143*d24d4204SJose E. Roman }
1144*d24d4204SJose E. Roman 
1145*d24d4204SJose E. Roman PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1146*d24d4204SJose E. Roman {
1147*d24d4204SJose E. Roman   PetscErrorCode ierr;
1148*d24d4204SJose E. Roman   const PetscInt *ranges;
1149*d24d4204SJose E. Roman   PetscMPIInt    size;
1150*d24d4204SJose E. Roman   PetscInt       i,n;
1151*d24d4204SJose E. Roman 
1152*d24d4204SJose E. Roman   PetscFunctionBegin;
1153*d24d4204SJose E. Roman   ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr);
1154*d24d4204SJose E. Roman   if (size>2) {
1155*d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1156*d24d4204SJose E. Roman     n = ranges[1]-ranges[0];
1157*d24d4204SJose E. Roman     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1158*d24d4204SJose E. Roman     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1159*d24d4204SJose E. Roman   }
1160*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1161*d24d4204SJose E. Roman }
1162*d24d4204SJose E. Roman 
1163*d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1164*d24d4204SJose E. Roman {
1165*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1166*d24d4204SJose E. Roman   PetscErrorCode ierr;
1167*d24d4204SJose E. Roman   PetscBLASInt   info=0;
1168*d24d4204SJose E. Roman 
1169*d24d4204SJose E. Roman   PetscFunctionBegin;
1170*d24d4204SJose E. Roman   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1171*d24d4204SJose E. Roman   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1172*d24d4204SJose E. Roman 
1173*d24d4204SJose E. Roman   /* check that the layout is as enforced by MatCreateScaLAPACK */
1174*d24d4204SJose E. Roman   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1175*d24d4204SJose E. Roman   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1176*d24d4204SJose E. Roman 
1177*d24d4204SJose E. Roman   /* compute local sizes */
1178*d24d4204SJose E. Roman   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1179*d24d4204SJose E. Roman   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1180*d24d4204SJose E. Roman   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1181*d24d4204SJose E. Roman   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1182*d24d4204SJose E. Roman   a->lld  = PetscMax(1,a->locr);
1183*d24d4204SJose E. Roman 
1184*d24d4204SJose E. Roman   /* allocate local array */
1185*d24d4204SJose E. Roman   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1186*d24d4204SJose E. Roman 
1187*d24d4204SJose E. Roman   /* set up ScaLAPACK descriptor */
1188*d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1189*d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
1190*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1191*d24d4204SJose E. Roman }
1192*d24d4204SJose E. Roman 
1193*d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1194*d24d4204SJose E. Roman {
1195*d24d4204SJose E. Roman   PetscErrorCode ierr;
1196*d24d4204SJose E. Roman   PetscInt       nstash,reallocs;
1197*d24d4204SJose E. Roman 
1198*d24d4204SJose E. Roman   PetscFunctionBegin;
1199*d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
1200*d24d4204SJose E. Roman   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1201*d24d4204SJose E. Roman   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1202*d24d4204SJose E. Roman   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1203*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1204*d24d4204SJose E. Roman }
1205*d24d4204SJose E. Roman 
1206*d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1207*d24d4204SJose E. Roman {
1208*d24d4204SJose E. Roman   PetscErrorCode ierr;
1209*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1210*d24d4204SJose E. Roman   PetscMPIInt    n;
1211*d24d4204SJose E. Roman   PetscInt       i,flg,*row,*col;
1212*d24d4204SJose E. Roman   PetscScalar    *val;
1213*d24d4204SJose E. Roman   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1214*d24d4204SJose E. Roman 
1215*d24d4204SJose E. Roman   PetscFunctionBegin;
1216*d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
1217*d24d4204SJose E. Roman   while (1) {
1218*d24d4204SJose E. Roman     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1219*d24d4204SJose E. Roman     if (!flg) break;
1220*d24d4204SJose E. Roman     for (i=0;i<n;i++) {
1221*d24d4204SJose E. Roman       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1222*d24d4204SJose E. Roman       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1223*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1224*d24d4204SJose E. Roman       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1225*d24d4204SJose E. Roman       switch (A->insertmode) {
1226*d24d4204SJose E. Roman         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1227*d24d4204SJose E. Roman         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1228*d24d4204SJose E. Roman         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1229*d24d4204SJose E. Roman       }
1230*d24d4204SJose E. Roman     }
1231*d24d4204SJose E. Roman   }
1232*d24d4204SJose E. Roman   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1233*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1234*d24d4204SJose E. Roman }
1235*d24d4204SJose E. Roman 
1236*d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1237*d24d4204SJose E. Roman {
1238*d24d4204SJose E. Roman   PetscErrorCode ierr;
1239*d24d4204SJose E. Roman   Mat            Adense,As;
1240*d24d4204SJose E. Roman   MPI_Comm       comm;
1241*d24d4204SJose E. Roman 
1242*d24d4204SJose E. Roman   PetscFunctionBegin;
1243*d24d4204SJose E. Roman   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1244*d24d4204SJose E. Roman   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1245*d24d4204SJose E. Roman   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1246*d24d4204SJose E. Roman   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1247*d24d4204SJose E. Roman   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1248*d24d4204SJose E. Roman   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1249*d24d4204SJose E. Roman   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1250*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1251*d24d4204SJose E. Roman }
1252*d24d4204SJose E. Roman 
1253*d24d4204SJose E. Roman /* -------------------------------------------------------------------*/
1254*d24d4204SJose E. Roman static struct _MatOps MatOps_Values = {
1255*d24d4204SJose E. Roman        MatSetValues_ScaLAPACK,
1256*d24d4204SJose E. Roman        0,
1257*d24d4204SJose E. Roman        0,
1258*d24d4204SJose E. Roman        MatMult_ScaLAPACK,
1259*d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK,
1260*d24d4204SJose E. Roman        MatMultTranspose_ScaLAPACK,
1261*d24d4204SJose E. Roman        MatMultTransposeAdd_ScaLAPACK,
1262*d24d4204SJose E. Roman        MatSolve_ScaLAPACK,
1263*d24d4204SJose E. Roman        MatSolveAdd_ScaLAPACK,
1264*d24d4204SJose E. Roman        0,
1265*d24d4204SJose E. Roman /*10*/ 0,
1266*d24d4204SJose E. Roman        MatLUFactor_ScaLAPACK,
1267*d24d4204SJose E. Roman        MatCholeskyFactor_ScaLAPACK,
1268*d24d4204SJose E. Roman        0,
1269*d24d4204SJose E. Roman        MatTranspose_ScaLAPACK,
1270*d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK,
1271*d24d4204SJose E. Roman        0,
1272*d24d4204SJose E. Roman        MatGetDiagonal_ScaLAPACK,
1273*d24d4204SJose E. Roman        MatDiagonalScale_ScaLAPACK,
1274*d24d4204SJose E. Roman        MatNorm_ScaLAPACK,
1275*d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK,
1276*d24d4204SJose E. Roman        MatAssemblyEnd_ScaLAPACK,
1277*d24d4204SJose E. Roman        MatSetOption_ScaLAPACK,
1278*d24d4204SJose E. Roman        MatZeroEntries_ScaLAPACK,
1279*d24d4204SJose E. Roman /*24*/ 0,
1280*d24d4204SJose E. Roman        MatLUFactorSymbolic_ScaLAPACK,
1281*d24d4204SJose E. Roman        MatLUFactorNumeric_ScaLAPACK,
1282*d24d4204SJose E. Roman        MatCholeskyFactorSymbolic_ScaLAPACK,
1283*d24d4204SJose E. Roman        MatCholeskyFactorNumeric_ScaLAPACK,
1284*d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK,
1285*d24d4204SJose E. Roman        0,
1286*d24d4204SJose E. Roman        0,
1287*d24d4204SJose E. Roman        0,
1288*d24d4204SJose E. Roman        0,
1289*d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK,
1290*d24d4204SJose E. Roman        0,
1291*d24d4204SJose E. Roman        0,
1292*d24d4204SJose E. Roman        0,
1293*d24d4204SJose E. Roman        0,
1294*d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK,
1295*d24d4204SJose E. Roman        0,
1296*d24d4204SJose E. Roman        0,
1297*d24d4204SJose E. Roman        0,
1298*d24d4204SJose E. Roman        MatCopy_ScaLAPACK,
1299*d24d4204SJose E. Roman /*44*/ 0,
1300*d24d4204SJose E. Roman        MatScale_ScaLAPACK,
1301*d24d4204SJose E. Roman        MatShift_ScaLAPACK,
1302*d24d4204SJose E. Roman        0,
1303*d24d4204SJose E. Roman        0,
1304*d24d4204SJose E. Roman /*49*/ 0,
1305*d24d4204SJose E. Roman        0,
1306*d24d4204SJose E. Roman        0,
1307*d24d4204SJose E. Roman        0,
1308*d24d4204SJose E. Roman        0,
1309*d24d4204SJose E. Roman /*54*/ 0,
1310*d24d4204SJose E. Roman        0,
1311*d24d4204SJose E. Roman        0,
1312*d24d4204SJose E. Roman        0,
1313*d24d4204SJose E. Roman        0,
1314*d24d4204SJose E. Roman /*59*/ 0,
1315*d24d4204SJose E. Roman        MatDestroy_ScaLAPACK,
1316*d24d4204SJose E. Roman        MatView_ScaLAPACK,
1317*d24d4204SJose E. Roman        0,
1318*d24d4204SJose E. Roman        0,
1319*d24d4204SJose E. Roman /*64*/ 0,
1320*d24d4204SJose E. Roman        0,
1321*d24d4204SJose E. Roman        0,
1322*d24d4204SJose E. Roman        0,
1323*d24d4204SJose E. Roman        0,
1324*d24d4204SJose E. Roman /*69*/ 0,
1325*d24d4204SJose E. Roman        0,
1326*d24d4204SJose E. Roman        MatConvert_ScaLAPACK_Dense,
1327*d24d4204SJose E. Roman        0,
1328*d24d4204SJose E. Roman        0,
1329*d24d4204SJose E. Roman /*74*/ 0,
1330*d24d4204SJose E. Roman        0,
1331*d24d4204SJose E. Roman        0,
1332*d24d4204SJose E. Roman        0,
1333*d24d4204SJose E. Roman        0,
1334*d24d4204SJose E. Roman /*79*/ 0,
1335*d24d4204SJose E. Roman        0,
1336*d24d4204SJose E. Roman        0,
1337*d24d4204SJose E. Roman        0,
1338*d24d4204SJose E. Roman        MatLoad_ScaLAPACK,
1339*d24d4204SJose E. Roman /*84*/ 0,
1340*d24d4204SJose E. Roman        0,
1341*d24d4204SJose E. Roman        0,
1342*d24d4204SJose E. Roman        0,
1343*d24d4204SJose E. Roman        0,
1344*d24d4204SJose E. Roman /*89*/ 0,
1345*d24d4204SJose E. Roman        0,
1346*d24d4204SJose E. Roman        MatMatMultNumeric_ScaLAPACK,
1347*d24d4204SJose E. Roman        0,
1348*d24d4204SJose E. Roman        0,
1349*d24d4204SJose E. Roman /*94*/ 0,
1350*d24d4204SJose E. Roman        0,
1351*d24d4204SJose E. Roman        0,
1352*d24d4204SJose E. Roman        MatMatTransposeMultNumeric_ScaLAPACK,
1353*d24d4204SJose E. Roman        0,
1354*d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK,
1355*d24d4204SJose E. Roman        0,
1356*d24d4204SJose E. Roman        0,
1357*d24d4204SJose E. Roman        MatConjugate_ScaLAPACK,
1358*d24d4204SJose E. Roman        0,
1359*d24d4204SJose E. Roman /*104*/0,
1360*d24d4204SJose E. Roman        0,
1361*d24d4204SJose E. Roman        0,
1362*d24d4204SJose E. Roman        0,
1363*d24d4204SJose E. Roman        0,
1364*d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK,
1365*d24d4204SJose E. Roman        0,
1366*d24d4204SJose E. Roman        0,
1367*d24d4204SJose E. Roman        0,
1368*d24d4204SJose E. Roman        MatMissingDiagonal_ScaLAPACK,
1369*d24d4204SJose E. Roman /*114*/0,
1370*d24d4204SJose E. Roman        0,
1371*d24d4204SJose E. Roman        0,
1372*d24d4204SJose E. Roman        0,
1373*d24d4204SJose E. Roman        0,
1374*d24d4204SJose E. Roman /*119*/0,
1375*d24d4204SJose E. Roman        MatHermitianTranspose_ScaLAPACK,
1376*d24d4204SJose E. Roman        0,
1377*d24d4204SJose E. Roman        0,
1378*d24d4204SJose E. Roman        0,
1379*d24d4204SJose E. Roman /*124*/0,
1380*d24d4204SJose E. Roman        0,
1381*d24d4204SJose E. Roman        0,
1382*d24d4204SJose E. Roman        0,
1383*d24d4204SJose E. Roman        0,
1384*d24d4204SJose E. Roman /*129*/0,
1385*d24d4204SJose E. Roman        0,
1386*d24d4204SJose E. Roman        0,
1387*d24d4204SJose E. Roman        0,
1388*d24d4204SJose E. Roman        0,
1389*d24d4204SJose E. Roman /*134*/0,
1390*d24d4204SJose E. Roman        0,
1391*d24d4204SJose E. Roman        0,
1392*d24d4204SJose E. Roman        0,
1393*d24d4204SJose E. Roman        0,
1394*d24d4204SJose E. Roman        0,
1395*d24d4204SJose E. Roman /*140*/0,
1396*d24d4204SJose E. Roman        0,
1397*d24d4204SJose E. Roman        0,
1398*d24d4204SJose E. Roman        0,
1399*d24d4204SJose E. Roman        0,
1400*d24d4204SJose E. Roman /*145*/0,
1401*d24d4204SJose E. Roman        0,
1402*d24d4204SJose E. Roman        0
1403*d24d4204SJose E. Roman };
1404*d24d4204SJose E. Roman 
1405*d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1406*d24d4204SJose E. Roman {
1407*d24d4204SJose E. Roman   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1408*d24d4204SJose E. Roman   PetscInt           size=stash->size,nsends;
1409*d24d4204SJose E. Roman   PetscErrorCode     ierr;
1410*d24d4204SJose E. Roman   PetscInt           count,*sindices,**rindices,i,j,l;
1411*d24d4204SJose E. Roman   PetscScalar        **rvalues,*svalues;
1412*d24d4204SJose E. Roman   MPI_Comm           comm = stash->comm;
1413*d24d4204SJose E. Roman   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1414*d24d4204SJose E. Roman   PetscMPIInt        *sizes,*nlengths,nreceives;
1415*d24d4204SJose E. Roman   PetscInt           *sp_idx,*sp_idy;
1416*d24d4204SJose E. Roman   PetscScalar        *sp_val;
1417*d24d4204SJose E. Roman   PetscMatStashSpace space,space_next;
1418*d24d4204SJose E. Roman   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1419*d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1420*d24d4204SJose E. Roman 
1421*d24d4204SJose E. Roman   PetscFunctionBegin;
1422*d24d4204SJose E. Roman   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1423*d24d4204SJose E. Roman     InsertMode addv;
1424*d24d4204SJose E. Roman     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425*d24d4204SJose E. Roman     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1426*d24d4204SJose E. Roman     mat->insertmode = addv; /* in case this processor had no cache */
1427*d24d4204SJose E. Roman   }
1428*d24d4204SJose E. Roman 
1429*d24d4204SJose E. Roman   bs2 = stash->bs*stash->bs;
1430*d24d4204SJose E. Roman 
1431*d24d4204SJose E. Roman   /*  first count number of contributors to each processor */
1432*d24d4204SJose E. Roman   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1433*d24d4204SJose E. Roman   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1434*d24d4204SJose E. Roman 
1435*d24d4204SJose E. Roman   i     = j    = 0;
1436*d24d4204SJose E. Roman   space = stash->space_head;
1437*d24d4204SJose E. Roman   while (space) {
1438*d24d4204SJose E. Roman     space_next = space->next;
1439*d24d4204SJose E. Roman     for (l=0; l<space->local_used; l++) {
1440*d24d4204SJose E. Roman       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1441*d24d4204SJose E. Roman       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1442*d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1443*d24d4204SJose E. Roman       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1444*d24d4204SJose E. Roman       nlengths[j]++; owner[i] = j;
1445*d24d4204SJose E. Roman       i++;
1446*d24d4204SJose E. Roman     }
1447*d24d4204SJose E. Roman     space = space_next;
1448*d24d4204SJose E. Roman   }
1449*d24d4204SJose E. Roman 
1450*d24d4204SJose E. Roman   /* Now check what procs get messages - and compute nsends. */
1451*d24d4204SJose E. Roman   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1452*d24d4204SJose E. Roman   for (i=0, nsends=0; i<size; i++) {
1453*d24d4204SJose E. Roman     if (nlengths[i]) {
1454*d24d4204SJose E. Roman       sizes[i] = 1; nsends++;
1455*d24d4204SJose E. Roman     }
1456*d24d4204SJose E. Roman   }
1457*d24d4204SJose E. Roman 
1458*d24d4204SJose E. Roman   {PetscMPIInt *onodes,*olengths;
1459*d24d4204SJose E. Roman    /* Determine the number of messages to expect, their lengths, from from-ids */
1460*d24d4204SJose E. Roman    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1461*d24d4204SJose E. Roman    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1462*d24d4204SJose E. Roman    /* since clubbing row,col - lengths are multiplied by 2 */
1463*d24d4204SJose E. Roman    for (i=0; i<nreceives; i++) olengths[i] *=2;
1464*d24d4204SJose E. Roman    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1465*d24d4204SJose E. Roman    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1466*d24d4204SJose E. Roman    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1467*d24d4204SJose E. Roman    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1468*d24d4204SJose E. Roman    ierr = PetscFree(onodes);CHKERRQ(ierr);
1469*d24d4204SJose E. Roman    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1470*d24d4204SJose E. Roman 
1471*d24d4204SJose E. Roman   /* do sends:
1472*d24d4204SJose E. Roman       1) starts[i] gives the starting index in svalues for stuff going to
1473*d24d4204SJose E. Roman          the ith processor
1474*d24d4204SJose E. Roman   */
1475*d24d4204SJose E. Roman   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1476*d24d4204SJose E. Roman   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1477*d24d4204SJose E. Roman   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1478*d24d4204SJose E. Roman   /* use 2 sends the first with all_a, the next with all_i and all_j */
1479*d24d4204SJose E. Roman   startv[0] = 0; starti[0] = 0;
1480*d24d4204SJose E. Roman   for (i=1; i<size; i++) {
1481*d24d4204SJose E. Roman     startv[i] = startv[i-1] + nlengths[i-1];
1482*d24d4204SJose E. Roman     starti[i] = starti[i-1] + 2*nlengths[i-1];
1483*d24d4204SJose E. Roman   }
1484*d24d4204SJose E. Roman 
1485*d24d4204SJose E. Roman   i     = 0;
1486*d24d4204SJose E. Roman   space = stash->space_head;
1487*d24d4204SJose E. Roman   while (space) {
1488*d24d4204SJose E. Roman     space_next = space->next;
1489*d24d4204SJose E. Roman     sp_idx     = space->idx;
1490*d24d4204SJose E. Roman     sp_idy     = space->idy;
1491*d24d4204SJose E. Roman     sp_val     = space->val;
1492*d24d4204SJose E. Roman     for (l=0; l<space->local_used; l++) {
1493*d24d4204SJose E. Roman       j = owner[i];
1494*d24d4204SJose E. Roman       if (bs2 == 1) {
1495*d24d4204SJose E. Roman         svalues[startv[j]] = sp_val[l];
1496*d24d4204SJose E. Roman       } else {
1497*d24d4204SJose E. Roman         PetscInt    k;
1498*d24d4204SJose E. Roman         PetscScalar *buf1,*buf2;
1499*d24d4204SJose E. Roman         buf1 = svalues+bs2*startv[j];
1500*d24d4204SJose E. Roman         buf2 = space->val + bs2*l;
1501*d24d4204SJose E. Roman         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1502*d24d4204SJose E. Roman       }
1503*d24d4204SJose E. Roman       sindices[starti[j]]             = sp_idx[l];
1504*d24d4204SJose E. Roman       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1505*d24d4204SJose E. Roman       startv[j]++;
1506*d24d4204SJose E. Roman       starti[j]++;
1507*d24d4204SJose E. Roman       i++;
1508*d24d4204SJose E. Roman     }
1509*d24d4204SJose E. Roman     space = space_next;
1510*d24d4204SJose E. Roman   }
1511*d24d4204SJose E. Roman   startv[0] = 0;
1512*d24d4204SJose E. Roman   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1513*d24d4204SJose E. Roman 
1514*d24d4204SJose E. Roman   for (i=0,count=0; i<size; i++) {
1515*d24d4204SJose E. Roman     if (sizes[i]) {
1516*d24d4204SJose E. Roman       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
1517*d24d4204SJose E. Roman       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
1518*d24d4204SJose E. Roman     }
1519*d24d4204SJose E. Roman   }
1520*d24d4204SJose E. Roman #if defined(PETSC_USE_INFO)
1521*d24d4204SJose E. Roman   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1522*d24d4204SJose E. Roman   for (i=0; i<size; i++) {
1523*d24d4204SJose E. Roman     if (sizes[i]) {
1524*d24d4204SJose E. Roman       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1525*d24d4204SJose E. Roman     }
1526*d24d4204SJose E. Roman   }
1527*d24d4204SJose E. Roman #endif
1528*d24d4204SJose E. Roman   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1529*d24d4204SJose E. Roman   ierr = PetscFree(owner);CHKERRQ(ierr);
1530*d24d4204SJose E. Roman   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1531*d24d4204SJose E. Roman   ierr = PetscFree(sizes);CHKERRQ(ierr);
1532*d24d4204SJose E. Roman 
1533*d24d4204SJose E. Roman   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1534*d24d4204SJose E. Roman   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1535*d24d4204SJose E. Roman 
1536*d24d4204SJose E. Roman   for (i=0; i<nreceives; i++) {
1537*d24d4204SJose E. Roman     recv_waits[2*i]   = recv_waits1[i];
1538*d24d4204SJose E. Roman     recv_waits[2*i+1] = recv_waits2[i];
1539*d24d4204SJose E. Roman   }
1540*d24d4204SJose E. Roman   stash->recv_waits = recv_waits;
1541*d24d4204SJose E. Roman 
1542*d24d4204SJose E. Roman   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1543*d24d4204SJose E. Roman   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1544*d24d4204SJose E. Roman 
1545*d24d4204SJose E. Roman   stash->svalues         = svalues;
1546*d24d4204SJose E. Roman   stash->sindices        = sindices;
1547*d24d4204SJose E. Roman   stash->rvalues         = rvalues;
1548*d24d4204SJose E. Roman   stash->rindices        = rindices;
1549*d24d4204SJose E. Roman   stash->send_waits      = send_waits;
1550*d24d4204SJose E. Roman   stash->nsends          = nsends;
1551*d24d4204SJose E. Roman   stash->nrecvs          = nreceives;
1552*d24d4204SJose E. Roman   stash->reproduce_count = 0;
1553*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1554*d24d4204SJose E. Roman }
1555*d24d4204SJose E. Roman 
1556*d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1557*d24d4204SJose E. Roman {
1558*d24d4204SJose E. Roman   PetscErrorCode ierr;
1559*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1560*d24d4204SJose E. Roman 
1561*d24d4204SJose E. Roman   PetscFunctionBegin;
1562*d24d4204SJose E. Roman   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1563*d24d4204SJose E. Roman   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1564*d24d4204SJose E. Roman   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1565*d24d4204SJose E. Roman   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1566*d24d4204SJose E. Roman   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1567*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1568*d24d4204SJose E. Roman }
1569*d24d4204SJose E. Roman 
1570*d24d4204SJose E. Roman /*@
1571*d24d4204SJose E. Roman    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1572*d24d4204SJose E. Roman    the ScaLAPACK matrix
1573*d24d4204SJose E. Roman 
1574*d24d4204SJose E. Roman    Logically Collective on A
1575*d24d4204SJose E. Roman 
1576*d24d4204SJose E. Roman    Input Parameter:
1577*d24d4204SJose E. Roman +  A  - a MATSCALAPACK matrix
1578*d24d4204SJose E. Roman .  mb - the row block size
1579*d24d4204SJose E. Roman -  nb - the column block size
1580*d24d4204SJose E. Roman 
1581*d24d4204SJose E. Roman    Level: intermediate
1582*d24d4204SJose E. Roman 
1583*d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1584*d24d4204SJose E. Roman @*/
1585*d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1586*d24d4204SJose E. Roman {
1587*d24d4204SJose E. Roman   PetscErrorCode ierr;
1588*d24d4204SJose E. Roman 
1589*d24d4204SJose E. Roman   PetscFunctionBegin;
1590*d24d4204SJose E. Roman   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1591*d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A,mb,2);
1592*d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A,nb,3);
1593*d24d4204SJose E. Roman   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1594*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1595*d24d4204SJose E. Roman }
1596*d24d4204SJose E. Roman 
1597*d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1598*d24d4204SJose E. Roman {
1599*d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1600*d24d4204SJose E. Roman 
1601*d24d4204SJose E. Roman   PetscFunctionBegin;
1602*d24d4204SJose E. Roman   if (mb) *mb = a->mb;
1603*d24d4204SJose E. Roman   if (nb) *nb = a->nb;
1604*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1605*d24d4204SJose E. Roman }
1606*d24d4204SJose E. Roman 
1607*d24d4204SJose E. Roman /*@
1608*d24d4204SJose E. Roman    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1609*d24d4204SJose E. Roman    the ScaLAPACK matrix
1610*d24d4204SJose E. Roman 
1611*d24d4204SJose E. Roman    Not collective
1612*d24d4204SJose E. Roman 
1613*d24d4204SJose E. Roman    Input Parameter:
1614*d24d4204SJose E. Roman .  A  - a MATSCALAPACK matrix
1615*d24d4204SJose E. Roman 
1616*d24d4204SJose E. Roman    Output Parameters:
1617*d24d4204SJose E. Roman +  mb - the row block size
1618*d24d4204SJose E. Roman -  nb - the column block size
1619*d24d4204SJose E. Roman 
1620*d24d4204SJose E. Roman    Level: intermediate
1621*d24d4204SJose E. Roman 
1622*d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1623*d24d4204SJose E. Roman @*/
1624*d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1625*d24d4204SJose E. Roman {
1626*d24d4204SJose E. Roman   PetscErrorCode ierr;
1627*d24d4204SJose E. Roman 
1628*d24d4204SJose E. Roman   PetscFunctionBegin;
1629*d24d4204SJose E. Roman   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1630*d24d4204SJose E. Roman   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1631*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1632*d24d4204SJose E. Roman }
1633*d24d4204SJose E. Roman 
1634*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1635*d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1636*d24d4204SJose E. Roman 
1637*d24d4204SJose E. Roman /*MC
1638*d24d4204SJose E. Roman    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1639*d24d4204SJose E. Roman 
1640*d24d4204SJose E. Roman    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1641*d24d4204SJose E. Roman 
1642*d24d4204SJose E. Roman    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1643*d24d4204SJose E. Roman 
1644*d24d4204SJose E. Roman    Options Database Keys:
1645*d24d4204SJose E. Roman +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1646*d24d4204SJose E. Roman .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1647*d24d4204SJose E. Roman -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1648*d24d4204SJose E. Roman 
1649*d24d4204SJose E. Roman    Level: beginner
1650*d24d4204SJose E. Roman 
1651*d24d4204SJose E. Roman .seealso: MATDENSE, MATELEMENTAL
1652*d24d4204SJose E. Roman M*/
1653*d24d4204SJose E. Roman 
1654*d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1655*d24d4204SJose E. Roman {
1656*d24d4204SJose E. Roman   Mat_ScaLAPACK      *a;
1657*d24d4204SJose E. Roman   PetscErrorCode     ierr;
1658*d24d4204SJose E. Roman   PetscBool          flg,flg1;
1659*d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1660*d24d4204SJose E. Roman   MPI_Comm           icomm;
1661*d24d4204SJose E. Roman   PetscBLASInt       nprow,npcol,myrow,mycol;
1662*d24d4204SJose E. Roman   PetscInt           optv1,k=2,array[2]={0,0};
1663*d24d4204SJose E. Roman   PetscMPIInt        size;
1664*d24d4204SJose E. Roman 
1665*d24d4204SJose E. Roman   PetscFunctionBegin;
1666*d24d4204SJose E. Roman   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1667*d24d4204SJose E. Roman   A->insertmode = NOT_SET_VALUES;
1668*d24d4204SJose E. Roman 
1669*d24d4204SJose E. Roman   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1670*d24d4204SJose E. Roman   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1671*d24d4204SJose E. Roman   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1672*d24d4204SJose E. Roman   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1673*d24d4204SJose E. Roman   A->stash.ScatterDestroy = NULL;
1674*d24d4204SJose E. Roman 
1675*d24d4204SJose E. Roman   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1676*d24d4204SJose E. Roman   A->data = (void*)a;
1677*d24d4204SJose E. Roman 
1678*d24d4204SJose E. Roman   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1679*d24d4204SJose E. Roman   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1680*d24d4204SJose E. Roman     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr);
1681*d24d4204SJose E. Roman   }
1682*d24d4204SJose E. Roman   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1683*d24d4204SJose E. Roman   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1684*d24d4204SJose E. Roman   if (!flg) {
1685*d24d4204SJose E. Roman     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1686*d24d4204SJose E. Roman 
1687*d24d4204SJose E. Roman     ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr);
1688*d24d4204SJose E. Roman     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1689*d24d4204SJose E. Roman 
1690*d24d4204SJose E. Roman     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1691*d24d4204SJose E. Roman     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1692*d24d4204SJose E. Roman     if (flg1) {
1693*d24d4204SJose E. Roman       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1694*d24d4204SJose E. Roman       grid->nprow = optv1;
1695*d24d4204SJose E. Roman     }
1696*d24d4204SJose E. Roman     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1697*d24d4204SJose E. Roman 
1698*d24d4204SJose E. Roman     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1699*d24d4204SJose E. Roman     grid->npcol = size/grid->nprow;
1700*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1701*d24d4204SJose E. Roman     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1702*d24d4204SJose E. Roman     Cblacs_get(-1,0,&grid->ictxt);
1703*d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1704*d24d4204SJose E. Roman     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1705*d24d4204SJose E. Roman     grid->grid_refct = 1;
1706*d24d4204SJose E. Roman     grid->nprow      = nprow;
1707*d24d4204SJose E. Roman     grid->npcol      = npcol;
1708*d24d4204SJose E. Roman     grid->myrow      = myrow;
1709*d24d4204SJose E. Roman     grid->mycol      = mycol;
1710*d24d4204SJose E. Roman     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1711*d24d4204SJose E. Roman     Cblacs_get(-1,0,&grid->ictxrow);
1712*d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1713*d24d4204SJose E. Roman     Cblacs_get(-1,0,&grid->ictxcol);
1714*d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1715*d24d4204SJose E. Roman     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr);
1716*d24d4204SJose E. Roman 
1717*d24d4204SJose E. Roman   } else grid->grid_refct++;
1718*d24d4204SJose E. Roman   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1719*d24d4204SJose E. Roman   a->grid = grid;
1720*d24d4204SJose E. Roman   a->mb   = DEFAULT_BLOCKSIZE;
1721*d24d4204SJose E. Roman   a->nb   = DEFAULT_BLOCKSIZE;
1722*d24d4204SJose E. Roman 
1723*d24d4204SJose E. Roman   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1724*d24d4204SJose E. Roman   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1725*d24d4204SJose E. Roman   if (flg) {
1726*d24d4204SJose E. Roman     a->mb = array[0];
1727*d24d4204SJose E. Roman     a->nb = (k>1)? array[1]: a->mb;
1728*d24d4204SJose E. Roman   }
1729*d24d4204SJose E. Roman   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1730*d24d4204SJose E. Roman 
1731*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1732*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1733*d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1734*d24d4204SJose E. Roman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1735*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1736*d24d4204SJose E. Roman }
1737*d24d4204SJose E. Roman 
1738*d24d4204SJose E. Roman /*@C
1739*d24d4204SJose E. Roman    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1740*d24d4204SJose E. Roman    (2D block cyclic distribution).
1741*d24d4204SJose E. Roman 
1742*d24d4204SJose E. Roman    Collective
1743*d24d4204SJose E. Roman 
1744*d24d4204SJose E. Roman    Input Parameters:
1745*d24d4204SJose E. Roman +  comm - MPI communicator
1746*d24d4204SJose E. Roman .  mb   - row block size (or PETSC_DECIDE to have it set)
1747*d24d4204SJose E. Roman .  nb   - column block size (or PETSC_DECIDE to have it set)
1748*d24d4204SJose E. Roman .  M    - number of global rows
1749*d24d4204SJose E. Roman .  N    - number of global columns
1750*d24d4204SJose E. Roman .  rsrc - coordinate of process that owns the first row of the distributed matrix
1751*d24d4204SJose E. Roman -  csrc - coordinate of process that owns the first column of the distributed matrix
1752*d24d4204SJose E. Roman 
1753*d24d4204SJose E. Roman    Output Parameter:
1754*d24d4204SJose E. Roman .  A - the matrix
1755*d24d4204SJose E. Roman 
1756*d24d4204SJose E. Roman    Options Database Keys:
1757*d24d4204SJose E. Roman .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1758*d24d4204SJose E. Roman 
1759*d24d4204SJose E. Roman    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1760*d24d4204SJose E. Roman    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1761*d24d4204SJose E. Roman    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1762*d24d4204SJose E. Roman 
1763*d24d4204SJose E. Roman    Notes:
1764*d24d4204SJose E. Roman    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1765*d24d4204SJose E. Roman    is chosen.
1766*d24d4204SJose E. Roman 
1767*d24d4204SJose E. Roman    Storage Information:
1768*d24d4204SJose E. Roman    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1769*d24d4204SJose E. Roman    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1770*d24d4204SJose E. Roman    significance and are thus ignored. The block sizes refer to the values
1771*d24d4204SJose E. Roman    used for the distributed matrix, not the same meaning as in BAIJ.
1772*d24d4204SJose E. Roman 
1773*d24d4204SJose E. Roman    Level: intermediate
1774*d24d4204SJose E. Roman 
1775*d24d4204SJose E. Roman .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1776*d24d4204SJose E. Roman @*/
1777*d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1778*d24d4204SJose E. Roman {
1779*d24d4204SJose E. Roman   PetscErrorCode ierr;
1780*d24d4204SJose E. Roman   Mat_ScaLAPACK  *a;
1781*d24d4204SJose E. Roman   PetscInt       m,n;
1782*d24d4204SJose E. Roman 
1783*d24d4204SJose E. Roman   PetscFunctionBegin;
1784*d24d4204SJose E. Roman   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1785*d24d4204SJose E. Roman   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1786*d24d4204SJose E. Roman   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1787*d24d4204SJose E. Roman   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1788*d24d4204SJose E. Roman   m = PETSC_DECIDE;
1789*d24d4204SJose E. Roman   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1790*d24d4204SJose E. Roman   n = PETSC_DECIDE;
1791*d24d4204SJose E. Roman   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1792*d24d4204SJose E. Roman   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1793*d24d4204SJose E. Roman   a = (Mat_ScaLAPACK*)(*A)->data;
1794*d24d4204SJose E. Roman   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1795*d24d4204SJose E. Roman   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1796*d24d4204SJose E. Roman   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1797*d24d4204SJose E. Roman   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1798*d24d4204SJose E. Roman   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1799*d24d4204SJose E. Roman   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1800*d24d4204SJose E. Roman   ierr = MatSetUp(*A);CHKERRQ(ierr);
1801*d24d4204SJose E. Roman   PetscFunctionReturn(0);
1802*d24d4204SJose E. Roman }
1803*d24d4204SJose E. Roman 
1804