xref: /petsc/src/mat/impls/scalapack/matscalapack.c (revision f7ec113fd12c8646f7554ffc9804b9c500e5de84)
1d24d4204SJose E. Roman #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2d24d4204SJose E. Roman 
3d24d4204SJose E. Roman #define DEFAULT_BLOCKSIZE 64
4d24d4204SJose E. Roman 
5d24d4204SJose E. Roman /*
6d24d4204SJose E. Roman     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7d24d4204SJose E. Roman   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8d24d4204SJose E. Roman */
9d24d4204SJose E. Roman static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10d24d4204SJose E. Roman 
11*f7ec113fSDamian Marek static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
12*f7ec113fSDamian Marek {
13*f7ec113fSDamian Marek   PetscErrorCode ierr;
14*f7ec113fSDamian Marek 
15*f7ec113fSDamian Marek   PetscFunctionBegin;
16*f7ec113fSDamian Marek   ierr = PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");CHKERRQ(ierr);
17*f7ec113fSDamian Marek   ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
18*f7ec113fSDamian Marek   PetscFunctionReturn(0);
19*f7ec113fSDamian Marek }
20*f7ec113fSDamian Marek 
21d24d4204SJose E. Roman static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
22d24d4204SJose E. Roman {
23d24d4204SJose E. Roman   PetscErrorCode    ierr;
24d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
25d24d4204SJose E. Roman   PetscBool         iascii;
26d24d4204SJose E. Roman   PetscViewerFormat format;
27d24d4204SJose E. Roman   Mat               Adense;
28d24d4204SJose E. Roman 
29d24d4204SJose E. Roman   PetscFunctionBegin;
30d24d4204SJose E. Roman   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
31d24d4204SJose E. Roman   if (iascii) {
32d24d4204SJose E. Roman     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
33d24d4204SJose E. Roman     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
34d24d4204SJose E. Roman       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
35d24d4204SJose E. Roman       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
36d24d4204SJose E. Roman       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
37d24d4204SJose E. Roman       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
38d24d4204SJose E. Roman       PetscFunctionReturn(0);
39d24d4204SJose E. Roman     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
40d24d4204SJose E. Roman       PetscFunctionReturn(0);
41d24d4204SJose E. Roman     }
42d24d4204SJose E. Roman   }
43d24d4204SJose E. Roman   /* convert to dense format and call MatView() */
44d24d4204SJose E. Roman   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
45d24d4204SJose E. Roman   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
46d24d4204SJose E. Roman   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
47d24d4204SJose E. Roman   PetscFunctionReturn(0);
48d24d4204SJose E. Roman }
49d24d4204SJose E. Roman 
50d24d4204SJose E. Roman static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
51d24d4204SJose E. Roman {
52d24d4204SJose E. Roman   PetscErrorCode ierr;
53d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
54d24d4204SJose E. Roman   PetscLogDouble isend[2],irecv[2];
55d24d4204SJose E. Roman 
56d24d4204SJose E. Roman   PetscFunctionBegin;
57d24d4204SJose E. Roman   info->block_size = 1.0;
58d24d4204SJose E. Roman 
59d24d4204SJose E. Roman   isend[0] = a->lld*a->locc;     /* locally allocated */
60d24d4204SJose E. Roman   isend[1] = a->locr*a->locc;    /* used submatrix */
61d24d4204SJose E. Roman   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
62d24d4204SJose E. Roman     info->nz_allocated   = isend[0];
63d24d4204SJose E. Roman     info->nz_used        = isend[1];
64d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_MAX) {
65d24d4204SJose E. Roman     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
66d24d4204SJose E. Roman     info->nz_allocated   = irecv[0];
67d24d4204SJose E. Roman     info->nz_used        = irecv[1];
68d24d4204SJose E. Roman   } else if (flag == MAT_GLOBAL_SUM) {
69d24d4204SJose E. Roman     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
70d24d4204SJose E. Roman     info->nz_allocated   = irecv[0];
71d24d4204SJose E. Roman     info->nz_used        = irecv[1];
72d24d4204SJose E. Roman   }
73d24d4204SJose E. Roman 
74d24d4204SJose E. Roman   info->nz_unneeded       = 0;
75d24d4204SJose E. Roman   info->assemblies        = A->num_ass;
76d24d4204SJose E. Roman   info->mallocs           = 0;
77d24d4204SJose E. Roman   info->memory            = ((PetscObject)A)->mem;
78d24d4204SJose E. Roman   info->fill_ratio_given  = 0;
79d24d4204SJose E. Roman   info->fill_ratio_needed = 0;
80d24d4204SJose E. Roman   info->factor_mallocs    = 0;
81d24d4204SJose E. Roman   PetscFunctionReturn(0);
82d24d4204SJose E. Roman }
83d24d4204SJose E. Roman 
84d24d4204SJose E. Roman PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
85d24d4204SJose E. Roman {
86d24d4204SJose E. Roman   PetscFunctionBegin;
87d24d4204SJose E. Roman   switch (op) {
88d24d4204SJose E. Roman     case MAT_NEW_NONZERO_LOCATIONS:
89d24d4204SJose E. Roman     case MAT_NEW_NONZERO_LOCATION_ERR:
90d24d4204SJose E. Roman     case MAT_NEW_NONZERO_ALLOCATION_ERR:
91d24d4204SJose E. Roman     case MAT_SYMMETRIC:
92d24d4204SJose E. Roman     case MAT_SORTED_FULL:
93d24d4204SJose E. Roman     case MAT_HERMITIAN:
94d24d4204SJose E. Roman       break;
95d24d4204SJose E. Roman     default:
96d24d4204SJose E. Roman       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
97d24d4204SJose E. Roman   }
98d24d4204SJose E. Roman   PetscFunctionReturn(0);
99d24d4204SJose E. Roman }
100d24d4204SJose E. Roman 
101d24d4204SJose E. Roman static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
102d24d4204SJose E. Roman {
103d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
104d24d4204SJose E. Roman   PetscErrorCode ierr;
105d24d4204SJose E. Roman   PetscInt       i,j;
106d24d4204SJose E. Roman   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
107d24d4204SJose E. Roman 
108d24d4204SJose E. Roman   PetscFunctionBegin;
109d24d4204SJose E. Roman   for (i=0;i<nr;i++) {
110d24d4204SJose E. Roman     if (rows[i] < 0) continue;
111d24d4204SJose E. Roman     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
112d24d4204SJose E. Roman     for (j=0;j<nc;j++) {
113d24d4204SJose E. Roman       if (cols[j] < 0) continue;
114d24d4204SJose E. Roman       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
115d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
116d24d4204SJose E. Roman       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
117d24d4204SJose E. Roman         switch (imode) {
118d24d4204SJose E. Roman           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
119d24d4204SJose E. Roman           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
120d24d4204SJose E. Roman           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
121d24d4204SJose E. Roman         }
122d24d4204SJose E. Roman       } else {
123d24d4204SJose E. Roman         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
124d24d4204SJose E. Roman         A->assembled = PETSC_FALSE;
125d24d4204SJose E. Roman         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
126d24d4204SJose E. Roman       }
127d24d4204SJose E. Roman     }
128d24d4204SJose E. Roman   }
129d24d4204SJose E. Roman   PetscFunctionReturn(0);
130d24d4204SJose E. Roman }
131d24d4204SJose E. Roman 
132d24d4204SJose E. Roman static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
133d24d4204SJose E. Roman {
134d24d4204SJose E. Roman   PetscErrorCode ierr;
135d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
136d24d4204SJose E. Roman   PetscScalar    *x2d,*y2d,alpha=1.0;
137d24d4204SJose E. Roman   const PetscInt *ranges;
138d24d4204SJose E. Roman   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
139d24d4204SJose E. Roman 
140d24d4204SJose E. Roman   PetscFunctionBegin;
141d24d4204SJose E. Roman   if (transpose) {
142d24d4204SJose E. Roman 
143d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
144d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
145d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
146d24d4204SJose E. Roman     xlld = PetscMax(1,A->rmap->n);
147d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
148d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
149d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
150d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
151d24d4204SJose E. Roman     ylld = 1;
152d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
153d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
154d24d4204SJose E. Roman 
155d24d4204SJose E. Roman     /* allocate 2d vectors */
156d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
157d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
158d24d4204SJose E. Roman     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
159d24d4204SJose E. Roman     xlld = PetscMax(1,lszx);
160d24d4204SJose E. Roman 
161d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
162d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
163d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
164d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
165d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
166d24d4204SJose E. Roman 
167d24d4204SJose E. Roman     /* redistribute x as a column of a 2d matrix */
168d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
169d24d4204SJose E. Roman 
170d24d4204SJose E. Roman     /* redistribute y as a row of a 2d matrix */
171d24d4204SJose E. Roman     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
172d24d4204SJose E. Roman 
173d24d4204SJose E. Roman     /* call PBLAS subroutine */
174d24d4204SJose E. Roman     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
175d24d4204SJose E. Roman 
176d24d4204SJose E. Roman     /* redistribute y from a row of a 2d matrix */
177d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
178d24d4204SJose E. Roman 
179d24d4204SJose E. Roman   } else {   /* non-transpose */
180d24d4204SJose E. Roman 
181d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
182d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
183d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
184d24d4204SJose E. Roman     xlld = 1;
185d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
186d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
187d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
188d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
189d24d4204SJose E. Roman     ylld = PetscMax(1,A->rmap->n);
190d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
191d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
192d24d4204SJose E. Roman 
193d24d4204SJose E. Roman     /* allocate 2d vectors */
194d24d4204SJose E. Roman     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
195d24d4204SJose E. Roman     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
196d24d4204SJose E. Roman     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
197d24d4204SJose E. Roman     ylld = PetscMax(1,lszy);
198d24d4204SJose E. Roman 
199d24d4204SJose E. Roman     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
200d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
201d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
202d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
203d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
204d24d4204SJose E. Roman 
205d24d4204SJose E. Roman     /* redistribute x as a row of a 2d matrix */
206d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
207d24d4204SJose E. Roman 
208d24d4204SJose E. Roman     /* redistribute y as a column of a 2d matrix */
209d24d4204SJose E. Roman     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
210d24d4204SJose E. Roman 
211d24d4204SJose E. Roman     /* call PBLAS subroutine */
212d24d4204SJose E. Roman     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
213d24d4204SJose E. Roman 
214d24d4204SJose E. Roman     /* redistribute y from a column of a 2d matrix */
215d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
216d24d4204SJose E. Roman 
217d24d4204SJose E. Roman   }
218d24d4204SJose E. Roman   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
219d24d4204SJose E. Roman   PetscFunctionReturn(0);
220d24d4204SJose E. Roman }
221d24d4204SJose E. Roman 
222d24d4204SJose E. Roman static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
223d24d4204SJose E. Roman {
224d24d4204SJose E. Roman   PetscErrorCode    ierr;
225d24d4204SJose E. Roman   const PetscScalar *xarray;
226d24d4204SJose E. Roman   PetscScalar       *yarray;
227d24d4204SJose E. Roman 
228d24d4204SJose E. Roman   PetscFunctionBegin;
229d24d4204SJose E. Roman   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
230d24d4204SJose E. Roman   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
231d24d4204SJose E. Roman   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
232d24d4204SJose E. Roman   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
233d24d4204SJose E. Roman   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
234d24d4204SJose E. Roman   PetscFunctionReturn(0);
235d24d4204SJose E. Roman }
236d24d4204SJose E. Roman 
237d24d4204SJose E. Roman static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
238d24d4204SJose E. Roman {
239d24d4204SJose E. Roman   PetscErrorCode    ierr;
240d24d4204SJose E. Roman   const PetscScalar *xarray;
241d24d4204SJose E. Roman   PetscScalar       *yarray;
242d24d4204SJose E. Roman 
243d24d4204SJose E. Roman   PetscFunctionBegin;
244d24d4204SJose E. Roman   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
245d24d4204SJose E. Roman   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
246d24d4204SJose E. Roman   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
247d24d4204SJose E. Roman   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
248d24d4204SJose E. Roman   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
249d24d4204SJose E. Roman   PetscFunctionReturn(0);
250d24d4204SJose E. Roman }
251d24d4204SJose E. Roman 
252d24d4204SJose E. Roman static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
253d24d4204SJose E. Roman {
254d24d4204SJose E. Roman   PetscErrorCode    ierr;
255d24d4204SJose E. Roman   const PetscScalar *xarray;
256d24d4204SJose E. Roman   PetscScalar       *zarray;
257d24d4204SJose E. Roman 
258d24d4204SJose E. Roman   PetscFunctionBegin;
259d24d4204SJose E. Roman   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
260d24d4204SJose E. Roman   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
261d24d4204SJose E. Roman   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
262d24d4204SJose E. Roman   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
263d24d4204SJose E. Roman   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
264d24d4204SJose E. Roman   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
265d24d4204SJose E. Roman   PetscFunctionReturn(0);
266d24d4204SJose E. Roman }
267d24d4204SJose E. Roman 
268d24d4204SJose E. Roman static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
269d24d4204SJose E. Roman {
270d24d4204SJose E. Roman   PetscErrorCode    ierr;
271d24d4204SJose E. Roman   const PetscScalar *xarray;
272d24d4204SJose E. Roman   PetscScalar       *zarray;
273d24d4204SJose E. Roman 
274d24d4204SJose E. Roman   PetscFunctionBegin;
275d24d4204SJose E. Roman   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
276d24d4204SJose E. Roman   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
277d24d4204SJose E. Roman   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
278d24d4204SJose E. Roman   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
279d24d4204SJose E. Roman   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
280d24d4204SJose E. Roman   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
281d24d4204SJose E. Roman   PetscFunctionReturn(0);
282d24d4204SJose E. Roman }
283d24d4204SJose E. Roman 
284d24d4204SJose E. Roman PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
285d24d4204SJose E. Roman {
286d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
287d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
288d24d4204SJose E. Roman   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
289d24d4204SJose E. Roman   PetscScalar   sone=1.0,zero=0.0;
290d24d4204SJose E. Roman   PetscBLASInt  one=1;
291d24d4204SJose E. Roman 
292d24d4204SJose E. Roman   PetscFunctionBegin;
293d24d4204SJose E. Roman   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
294d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
295d24d4204SJose E. Roman   PetscFunctionReturn(0);
296d24d4204SJose E. Roman }
297d24d4204SJose E. Roman 
298d24d4204SJose E. Roman PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
299d24d4204SJose E. Roman {
300d24d4204SJose E. Roman   PetscErrorCode ierr;
301d24d4204SJose E. Roman 
302d24d4204SJose E. Roman   PetscFunctionBegin;
303d24d4204SJose E. Roman   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
304d24d4204SJose E. Roman   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
305d24d4204SJose E. Roman   ierr = MatSetUp(C);CHKERRQ(ierr);
306d24d4204SJose E. Roman   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
307d24d4204SJose E. Roman   PetscFunctionReturn(0);
308d24d4204SJose E. Roman }
309d24d4204SJose E. Roman 
310d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
311d24d4204SJose E. Roman {
312d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
313d24d4204SJose E. Roman   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
314d24d4204SJose E. Roman   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
315d24d4204SJose E. Roman   PetscScalar   sone=1.0,zero=0.0;
316d24d4204SJose E. Roman   PetscBLASInt  one=1;
317d24d4204SJose E. Roman 
318d24d4204SJose E. Roman   PetscFunctionBegin;
319d24d4204SJose E. Roman   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
320d24d4204SJose E. Roman   C->assembled = PETSC_TRUE;
321d24d4204SJose E. Roman   PetscFunctionReturn(0);
322d24d4204SJose E. Roman }
323d24d4204SJose E. Roman 
324d24d4204SJose E. Roman static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
325d24d4204SJose E. Roman {
326d24d4204SJose E. Roman   PetscErrorCode ierr;
327d24d4204SJose E. Roman 
328d24d4204SJose E. Roman   PetscFunctionBegin;
329d24d4204SJose E. Roman   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
330d24d4204SJose E. Roman   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
331d24d4204SJose E. Roman   ierr = MatSetUp(C);CHKERRQ(ierr);
332d24d4204SJose E. Roman   PetscFunctionReturn(0);
333d24d4204SJose E. Roman }
334d24d4204SJose E. Roman 
335d24d4204SJose E. Roman /* --------------------------------------- */
336d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
337d24d4204SJose E. Roman {
338d24d4204SJose E. Roman   PetscFunctionBegin;
339d24d4204SJose E. Roman   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
340d24d4204SJose E. Roman   C->ops->productsymbolic = MatProductSymbolic_AB;
341d24d4204SJose E. Roman   PetscFunctionReturn(0);
342d24d4204SJose E. Roman }
343d24d4204SJose E. Roman 
344d24d4204SJose E. Roman static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
345d24d4204SJose E. Roman {
346d24d4204SJose E. Roman   PetscFunctionBegin;
347d24d4204SJose E. Roman   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
348d24d4204SJose E. Roman   C->ops->productsymbolic          = MatProductSymbolic_ABt;
349d24d4204SJose E. Roman   PetscFunctionReturn(0);
350d24d4204SJose E. Roman }
351d24d4204SJose E. Roman 
352d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
353d24d4204SJose E. Roman {
354d24d4204SJose E. Roman   PetscErrorCode ierr;
355d24d4204SJose E. Roman   Mat_Product    *product = C->product;
356d24d4204SJose E. Roman 
357d24d4204SJose E. Roman   PetscFunctionBegin;
358d24d4204SJose E. Roman   switch (product->type) {
359d24d4204SJose E. Roman     case MATPRODUCT_AB:
360d24d4204SJose E. Roman       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
361d24d4204SJose E. Roman       break;
362d24d4204SJose E. Roman     case MATPRODUCT_ABt:
363d24d4204SJose E. Roman       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
364d24d4204SJose E. Roman       break;
365d24d4204SJose E. Roman     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
366d24d4204SJose E. Roman   }
367d24d4204SJose E. Roman   PetscFunctionReturn(0);
368d24d4204SJose E. Roman }
369d24d4204SJose E. Roman /* --------------------------------------- */
370d24d4204SJose E. Roman 
371d24d4204SJose E. Roman static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
372d24d4204SJose E. Roman {
373d24d4204SJose E. Roman   PetscErrorCode    ierr;
374d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
375d24d4204SJose E. Roman   PetscScalar       *darray,*d2d,v;
376d24d4204SJose E. Roman   const PetscInt    *ranges;
377d24d4204SJose E. Roman   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
378d24d4204SJose E. Roman 
379d24d4204SJose E. Roman   PetscFunctionBegin;
380d24d4204SJose E. Roman   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
381d24d4204SJose E. Roman 
382d24d4204SJose E. Roman   if (A->rmap->N<=A->cmap->N) {   /* row version */
383d24d4204SJose E. Roman 
384d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
385d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
386d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
387d24d4204SJose E. Roman     dlld = PetscMax(1,A->rmap->n);
388d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
389d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
390d24d4204SJose E. Roman 
391d24d4204SJose E. Roman     /* allocate 2d vector */
392d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
393d24d4204SJose E. Roman     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
394d24d4204SJose E. Roman     dlld = PetscMax(1,lszd);
395d24d4204SJose E. Roman 
396d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
397d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
398d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
399d24d4204SJose E. Roman 
400d24d4204SJose E. Roman     /* collect diagonal */
401d24d4204SJose E. Roman     for (j=1;j<=a->M;j++) {
402d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
403d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
404d24d4204SJose E. Roman     }
405d24d4204SJose E. Roman 
406d24d4204SJose E. Roman     /* redistribute d from a column of a 2d matrix */
407d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
408d24d4204SJose E. Roman     ierr = PetscFree(d2d);CHKERRQ(ierr);
409d24d4204SJose E. Roman 
410d24d4204SJose E. Roman   } else {   /* column version */
411d24d4204SJose E. Roman 
412d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
413d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
414d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
415d24d4204SJose E. Roman     dlld = 1;
416d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
417d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
418d24d4204SJose E. Roman 
419d24d4204SJose E. Roman     /* allocate 2d vector */
420d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
421d24d4204SJose E. Roman     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
422d24d4204SJose E. Roman 
423d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
424d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
425d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
426d24d4204SJose E. Roman 
427d24d4204SJose E. Roman     /* collect diagonal */
428d24d4204SJose E. Roman     for (j=1;j<=a->N;j++) {
429d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
430d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
431d24d4204SJose E. Roman     }
432d24d4204SJose E. Roman 
433d24d4204SJose E. Roman     /* redistribute d from a row of a 2d matrix */
434d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
435d24d4204SJose E. Roman     ierr = PetscFree(d2d);CHKERRQ(ierr);
436d24d4204SJose E. Roman   }
437d24d4204SJose E. Roman 
438d24d4204SJose E. Roman   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
439d24d4204SJose E. Roman   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
440d24d4204SJose E. Roman   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
441d24d4204SJose E. Roman   PetscFunctionReturn(0);
442d24d4204SJose E. Roman }
443d24d4204SJose E. Roman 
444d24d4204SJose E. Roman static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
445d24d4204SJose E. Roman {
446d24d4204SJose E. Roman   PetscErrorCode    ierr;
447d24d4204SJose E. Roman   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
448d24d4204SJose E. Roman   const PetscScalar *d;
449d24d4204SJose E. Roman   const PetscInt    *ranges;
450d24d4204SJose E. Roman   PetscScalar       *d2d;
451d24d4204SJose E. Roman   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
452d24d4204SJose E. Roman 
453d24d4204SJose E. Roman   PetscFunctionBegin;
454d24d4204SJose E. Roman   if (R) {
455d24d4204SJose E. Roman     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
456d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
457d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
458d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
459d24d4204SJose E. Roman     dlld = 1;
460d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
461d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
462d24d4204SJose E. Roman 
463d24d4204SJose E. Roman     /* allocate 2d vector */
464d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
465d24d4204SJose E. Roman     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
466d24d4204SJose E. Roman 
467d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
468d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
469d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
470d24d4204SJose E. Roman 
471d24d4204SJose E. Roman     /* redistribute d to a row of a 2d matrix */
472d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
473d24d4204SJose E. Roman 
474d24d4204SJose E. Roman     /* broadcast along process columns */
475d24d4204SJose E. Roman     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
476d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
477d24d4204SJose E. Roman 
478d24d4204SJose E. Roman     /* local scaling */
479d24d4204SJose E. Roman     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
480d24d4204SJose E. Roman 
481d24d4204SJose E. Roman     ierr = PetscFree(d2d);CHKERRQ(ierr);
482d24d4204SJose E. Roman     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
483d24d4204SJose E. Roman   }
484d24d4204SJose E. Roman   if (L) {
485d24d4204SJose E. Roman     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
486d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (1d block distribution) */
487d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
488d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
489d24d4204SJose E. Roman     dlld = PetscMax(1,A->rmap->n);
490d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
491d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
492d24d4204SJose E. Roman 
493d24d4204SJose E. Roman     /* allocate 2d vector */
494d24d4204SJose E. Roman     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
495d24d4204SJose E. Roman     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
496d24d4204SJose E. Roman     dlld = PetscMax(1,lszd);
497d24d4204SJose E. Roman 
498d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for vector (2d block distribution) */
499d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
500d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
501d24d4204SJose E. Roman 
502d24d4204SJose E. Roman     /* redistribute d to a column of a 2d matrix */
503d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
504d24d4204SJose E. Roman 
505d24d4204SJose E. Roman     /* broadcast along process rows */
506d24d4204SJose E. Roman     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
507d24d4204SJose E. Roman     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
508d24d4204SJose E. Roman 
509d24d4204SJose E. Roman     /* local scaling */
510d24d4204SJose E. Roman     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
511d24d4204SJose E. Roman 
512d24d4204SJose E. Roman     ierr = PetscFree(d2d);CHKERRQ(ierr);
513d24d4204SJose E. Roman     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
514d24d4204SJose E. Roman   }
515d24d4204SJose E. Roman   PetscFunctionReturn(0);
516d24d4204SJose E. Roman }
517d24d4204SJose E. Roman 
518d24d4204SJose E. Roman static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
519d24d4204SJose E. Roman {
520d24d4204SJose E. Roman   PetscFunctionBegin;
521d24d4204SJose E. Roman   *missing = PETSC_FALSE;
522d24d4204SJose E. Roman   PetscFunctionReturn(0);
523d24d4204SJose E. Roman }
524d24d4204SJose E. Roman 
525d24d4204SJose E. Roman static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
526d24d4204SJose E. Roman {
527d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
528d24d4204SJose E. Roman   PetscBLASInt  n,one=1;
529d24d4204SJose E. Roman 
530d24d4204SJose E. Roman   PetscFunctionBegin;
531d24d4204SJose E. Roman   n = x->lld*x->locc;
532d24d4204SJose E. Roman   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
533d24d4204SJose E. Roman   PetscFunctionReturn(0);
534d24d4204SJose E. Roman }
535d24d4204SJose E. Roman 
536d24d4204SJose E. Roman static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
537d24d4204SJose E. Roman {
538d24d4204SJose E. Roman   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
539d24d4204SJose E. Roman   PetscBLASInt  i,n;
540d24d4204SJose E. Roman   PetscScalar   v;
541d24d4204SJose E. Roman 
542d24d4204SJose E. Roman   PetscFunctionBegin;
543d24d4204SJose E. Roman   n = PetscMin(x->M,x->N);
544d24d4204SJose E. Roman   for (i=1;i<=n;i++) {
545d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
546d24d4204SJose E. Roman     v += alpha;
547d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
548d24d4204SJose E. Roman   }
549d24d4204SJose E. Roman   PetscFunctionReturn(0);
550d24d4204SJose E. Roman }
551d24d4204SJose E. Roman 
552d24d4204SJose E. Roman static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
553d24d4204SJose E. Roman {
554d24d4204SJose E. Roman   PetscErrorCode ierr;
555d24d4204SJose E. Roman   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
556d24d4204SJose E. Roman   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
557d24d4204SJose E. Roman   PetscBLASInt   one=1;
558d24d4204SJose E. Roman   PetscScalar    beta=1.0;
559d24d4204SJose E. Roman 
560d24d4204SJose E. Roman   PetscFunctionBegin;
561d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(Y,1,X,3);
562d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
563d24d4204SJose E. Roman   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
564d24d4204SJose E. Roman   PetscFunctionReturn(0);
565d24d4204SJose E. Roman }
566d24d4204SJose E. Roman 
567d24d4204SJose E. Roman static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
568d24d4204SJose E. Roman {
569d24d4204SJose E. Roman   PetscErrorCode ierr;
570d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
571d24d4204SJose E. Roman   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
572d24d4204SJose E. Roman 
573d24d4204SJose E. Roman   PetscFunctionBegin;
574d24d4204SJose E. Roman   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
575d24d4204SJose E. Roman   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
576d24d4204SJose E. Roman   PetscFunctionReturn(0);
577d24d4204SJose E. Roman }
578d24d4204SJose E. Roman 
579d24d4204SJose E. Roman static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
580d24d4204SJose E. Roman {
581d24d4204SJose E. Roman   Mat            Bs;
582d24d4204SJose E. Roman   MPI_Comm       comm;
583d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
584d24d4204SJose E. Roman   PetscErrorCode ierr;
585d24d4204SJose E. Roman 
586d24d4204SJose E. Roman   PetscFunctionBegin;
587d24d4204SJose E. Roman   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
588d24d4204SJose E. Roman   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
589d24d4204SJose E. Roman   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
590d24d4204SJose E. Roman   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
591d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
592d24d4204SJose E. Roman   b->M    = a->M;
593d24d4204SJose E. Roman   b->N    = a->N;
594d24d4204SJose E. Roman   b->mb   = a->mb;
595d24d4204SJose E. Roman   b->nb   = a->nb;
596d24d4204SJose E. Roman   b->rsrc = a->rsrc;
597d24d4204SJose E. Roman   b->csrc = a->csrc;
598d24d4204SJose E. Roman   ierr = MatSetUp(Bs);CHKERRQ(ierr);
599d24d4204SJose E. Roman   *B = Bs;
600d24d4204SJose E. Roman   if (op == MAT_COPY_VALUES) {
601d24d4204SJose E. Roman     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
602d24d4204SJose E. Roman   }
603d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
604d24d4204SJose E. Roman   PetscFunctionReturn(0);
605d24d4204SJose E. Roman }
606d24d4204SJose E. Roman 
607d24d4204SJose E. Roman static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
608d24d4204SJose E. Roman {
609d24d4204SJose E. Roman   PetscErrorCode ierr;
610d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
611d24d4204SJose E. Roman   Mat            Bs = *B;
612d24d4204SJose E. Roman   PetscBLASInt   one=1;
613d24d4204SJose E. Roman   PetscScalar    sone=1.0,zero=0.0;
614d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
615d24d4204SJose E. Roman   PetscInt       i,j;
616d24d4204SJose E. Roman #endif
617d24d4204SJose E. Roman 
618d24d4204SJose E. Roman   PetscFunctionBegin;
619d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
620d24d4204SJose E. Roman     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
621d24d4204SJose E. Roman     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
622d24d4204SJose E. Roman     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
623d24d4204SJose E. Roman     ierr = MatSetUp(Bs);CHKERRQ(ierr);
624d24d4204SJose E. Roman     *B = Bs;
625d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
626d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
627d24d4204SJose E. Roman   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
628d24d4204SJose E. Roman #if defined(PETSC_USE_COMPLEX)
629d24d4204SJose E. Roman   /* undo conjugation */
630d24d4204SJose E. Roman   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
631d24d4204SJose E. Roman #endif
632d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
633d24d4204SJose E. Roman   PetscFunctionReturn(0);
634d24d4204SJose E. Roman }
635d24d4204SJose E. Roman 
636d24d4204SJose E. Roman static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
637d24d4204SJose E. Roman {
638d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
639d24d4204SJose E. Roman   PetscInt      i,j;
640d24d4204SJose E. Roman 
641d24d4204SJose E. Roman   PetscFunctionBegin;
642d24d4204SJose E. Roman   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
643d24d4204SJose E. Roman   PetscFunctionReturn(0);
644d24d4204SJose E. Roman }
645d24d4204SJose E. Roman 
646d24d4204SJose E. Roman static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
647d24d4204SJose E. Roman {
648d24d4204SJose E. Roman   PetscErrorCode ierr;
649d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
650d24d4204SJose E. Roman   Mat            Bs = *B;
651d24d4204SJose E. Roman   PetscBLASInt   one=1;
652d24d4204SJose E. Roman   PetscScalar    sone=1.0,zero=0.0;
653d24d4204SJose E. Roman 
654d24d4204SJose E. Roman   PetscFunctionBegin;
655d24d4204SJose E. Roman   if (reuse == MAT_INITIAL_MATRIX) {
656d24d4204SJose E. Roman     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
657d24d4204SJose E. Roman     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
658d24d4204SJose E. Roman     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
659d24d4204SJose E. Roman     ierr = MatSetUp(Bs);CHKERRQ(ierr);
660d24d4204SJose E. Roman     *B = Bs;
661d24d4204SJose E. Roman   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
662d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bs->data;
663d24d4204SJose E. Roman   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
664d24d4204SJose E. Roman   Bs->assembled = PETSC_TRUE;
665d24d4204SJose E. Roman   PetscFunctionReturn(0);
666d24d4204SJose E. Roman }
667d24d4204SJose E. Roman 
668d24d4204SJose E. Roman static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
669d24d4204SJose E. Roman {
670d24d4204SJose E. Roman   PetscErrorCode ierr;
671d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
672d24d4204SJose E. Roman   PetscScalar    *x,*x2d;
673d24d4204SJose E. Roman   const PetscInt *ranges;
674d24d4204SJose E. Roman   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
675d24d4204SJose E. Roman 
676d24d4204SJose E. Roman   PetscFunctionBegin;
677d24d4204SJose E. Roman   ierr = VecCopy(B,X);CHKERRQ(ierr);
678d24d4204SJose E. Roman   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
679d24d4204SJose E. Roman 
680d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
681d24d4204SJose E. Roman   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
682d24d4204SJose E. Roman   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
683d24d4204SJose E. Roman   xlld = PetscMax(1,A->rmap->n);
684d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
685d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
686d24d4204SJose E. Roman 
687d24d4204SJose E. Roman   /* allocate 2d vector */
688d24d4204SJose E. Roman   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
689d24d4204SJose E. Roman   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
690d24d4204SJose E. Roman   xlld = PetscMax(1,lszx);
691d24d4204SJose E. Roman 
692d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
693d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
694d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
695d24d4204SJose E. Roman 
696d24d4204SJose E. Roman   /* redistribute x as a column of a 2d matrix */
697d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
698d24d4204SJose E. Roman 
699d24d4204SJose E. Roman   /* call ScaLAPACK subroutine */
700d24d4204SJose E. Roman   switch (A->factortype) {
701d24d4204SJose E. Roman     case MAT_FACTOR_LU:
702d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
703d24d4204SJose E. Roman       PetscCheckScaLapackInfo("getrs",info);
704d24d4204SJose E. Roman       break;
705d24d4204SJose E. Roman     case MAT_FACTOR_CHOLESKY:
706d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
707d24d4204SJose E. Roman       PetscCheckScaLapackInfo("potrs",info);
708d24d4204SJose E. Roman       break;
709d24d4204SJose E. Roman     default:
710d24d4204SJose E. Roman       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
711d24d4204SJose E. Roman       break;
712d24d4204SJose E. Roman   }
713d24d4204SJose E. Roman 
714d24d4204SJose E. Roman   /* redistribute x from a column of a 2d matrix */
715d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
716d24d4204SJose E. Roman 
717d24d4204SJose E. Roman   ierr = PetscFree(x2d);CHKERRQ(ierr);
718d24d4204SJose E. Roman   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
719d24d4204SJose E. Roman   PetscFunctionReturn(0);
720d24d4204SJose E. Roman }
721d24d4204SJose E. Roman 
722d24d4204SJose E. Roman static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
723d24d4204SJose E. Roman {
724d24d4204SJose E. Roman   PetscErrorCode ierr;
725d24d4204SJose E. Roman 
726d24d4204SJose E. Roman   PetscFunctionBegin;
727d24d4204SJose E. Roman   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
728d24d4204SJose E. Roman   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
729d24d4204SJose E. Roman   PetscFunctionReturn(0);
730d24d4204SJose E. Roman }
731d24d4204SJose E. Roman 
732d24d4204SJose E. Roman static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
733d24d4204SJose E. Roman {
734d24d4204SJose E. Roman   PetscErrorCode ierr;
735d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
736d24d4204SJose E. Roman   PetscBool      flg1,flg2;
737d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
738d24d4204SJose E. Roman 
739d24d4204SJose E. Roman   PetscFunctionBegin;
740d24d4204SJose E. Roman   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
741d24d4204SJose E. Roman   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
742d24d4204SJose E. Roman   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
743d24d4204SJose E. Roman   MatScaLAPACKCheckDistribution(B,1,X,2);
744d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)B->data;
745d24d4204SJose E. Roman   x = (Mat_ScaLAPACK*)X->data;
746d24d4204SJose E. Roman   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
747d24d4204SJose E. Roman 
748d24d4204SJose E. Roman   switch (A->factortype) {
749d24d4204SJose E. Roman     case MAT_FACTOR_LU:
750d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
751d24d4204SJose E. Roman       PetscCheckScaLapackInfo("getrs",info);
752d24d4204SJose E. Roman       break;
753d24d4204SJose E. Roman     case MAT_FACTOR_CHOLESKY:
754d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
755d24d4204SJose E. Roman       PetscCheckScaLapackInfo("potrs",info);
756d24d4204SJose E. Roman       break;
757d24d4204SJose E. Roman     default:
758d24d4204SJose E. Roman       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
759d24d4204SJose E. Roman       break;
760d24d4204SJose E. Roman   }
761d24d4204SJose E. Roman   PetscFunctionReturn(0);
762d24d4204SJose E. Roman }
763d24d4204SJose E. Roman 
764d24d4204SJose E. Roman static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
765d24d4204SJose E. Roman {
766d24d4204SJose E. Roman   PetscErrorCode ierr;
767d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
768d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
769d24d4204SJose E. Roman 
770d24d4204SJose E. Roman   PetscFunctionBegin;
771d24d4204SJose E. Roman   if (!a->pivots) {
772d24d4204SJose E. Roman     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
773d24d4204SJose E. Roman     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
774d24d4204SJose E. Roman   }
775d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
776d24d4204SJose E. Roman   PetscCheckScaLapackInfo("getrf",info);
777d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_LU;
778d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
779d24d4204SJose E. Roman 
780d24d4204SJose E. Roman   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
781d24d4204SJose E. Roman   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
782d24d4204SJose E. Roman   PetscFunctionReturn(0);
783d24d4204SJose E. Roman }
784d24d4204SJose E. Roman 
785d24d4204SJose E. Roman static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
786d24d4204SJose E. Roman {
787d24d4204SJose E. Roman   PetscErrorCode ierr;
788d24d4204SJose E. Roman 
789d24d4204SJose E. Roman   PetscFunctionBegin;
790d24d4204SJose E. Roman   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
791d24d4204SJose E. Roman   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
792d24d4204SJose E. Roman   PetscFunctionReturn(0);
793d24d4204SJose E. Roman }
794d24d4204SJose E. Roman 
795d24d4204SJose E. Roman static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
796d24d4204SJose E. Roman {
797d24d4204SJose E. Roman   PetscFunctionBegin;
798d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
799d24d4204SJose E. Roman   PetscFunctionReturn(0);
800d24d4204SJose E. Roman }
801d24d4204SJose E. Roman 
802d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
803d24d4204SJose E. Roman {
804d24d4204SJose E. Roman   PetscErrorCode ierr;
805d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
806d24d4204SJose E. Roman   PetscBLASInt   one=1,info;
807d24d4204SJose E. Roman 
808d24d4204SJose E. Roman   PetscFunctionBegin;
809d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
810d24d4204SJose E. Roman   PetscCheckScaLapackInfo("potrf",info);
811d24d4204SJose E. Roman   A->factortype = MAT_FACTOR_CHOLESKY;
812d24d4204SJose E. Roman   A->assembled  = PETSC_TRUE;
813d24d4204SJose E. Roman 
814d24d4204SJose E. Roman   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
815d24d4204SJose E. Roman   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
816d24d4204SJose E. Roman   PetscFunctionReturn(0);
817d24d4204SJose E. Roman }
818d24d4204SJose E. Roman 
819d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
820d24d4204SJose E. Roman {
821d24d4204SJose E. Roman   PetscErrorCode ierr;
822d24d4204SJose E. Roman 
823d24d4204SJose E. Roman   PetscFunctionBegin;
824d24d4204SJose E. Roman   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
825d24d4204SJose E. Roman   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
826d24d4204SJose E. Roman   PetscFunctionReturn(0);
827d24d4204SJose E. Roman }
828d24d4204SJose E. Roman 
829d24d4204SJose E. Roman static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
830d24d4204SJose E. Roman {
831d24d4204SJose E. Roman   PetscFunctionBegin;
832d24d4204SJose E. Roman   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
833d24d4204SJose E. Roman   PetscFunctionReturn(0);
834d24d4204SJose E. Roman }
835d24d4204SJose E. Roman 
836d24d4204SJose E. Roman PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
837d24d4204SJose E. Roman {
838d24d4204SJose E. Roman   PetscFunctionBegin;
839d24d4204SJose E. Roman   *type = MATSOLVERSCALAPACK;
840d24d4204SJose E. Roman   PetscFunctionReturn(0);
841d24d4204SJose E. Roman }
842d24d4204SJose E. Roman 
843d24d4204SJose E. Roman static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
844d24d4204SJose E. Roman {
845d24d4204SJose E. Roman   Mat            B;
846d24d4204SJose E. Roman   PetscErrorCode ierr;
847d24d4204SJose E. Roman 
848d24d4204SJose E. Roman   PetscFunctionBegin;
849d24d4204SJose E. Roman   /* Create the factorization matrix */
850d24d4204SJose E. Roman   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
851d24d4204SJose E. Roman   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
852d24d4204SJose E. Roman   ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
853d24d4204SJose E. Roman   ierr = MatSetUp(B);CHKERRQ(ierr);
854d24d4204SJose E. Roman   B->factortype = ftype;
855d24d4204SJose E. Roman   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
856d24d4204SJose E. Roman   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
857d24d4204SJose E. Roman 
858d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
859d24d4204SJose E. Roman   *F = B;
860d24d4204SJose E. Roman   PetscFunctionReturn(0);
861d24d4204SJose E. Roman }
862d24d4204SJose E. Roman 
863d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
864d24d4204SJose E. Roman {
865d24d4204SJose E. Roman   PetscErrorCode ierr;
866d24d4204SJose E. Roman 
867d24d4204SJose E. Roman   PetscFunctionBegin;
868d24d4204SJose E. Roman   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
869d24d4204SJose E. Roman   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
870d24d4204SJose E. Roman   PetscFunctionReturn(0);
871d24d4204SJose E. Roman }
872d24d4204SJose E. Roman 
873d24d4204SJose E. Roman static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
874d24d4204SJose E. Roman {
875d24d4204SJose E. Roman   PetscErrorCode ierr;
876d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
877d24d4204SJose E. Roman   PetscBLASInt   one=1,lwork=0;
878d24d4204SJose E. Roman   const char     *ntype;
879d68f4f38SPierre Jolivet   PetscScalar    *work=NULL,dummy;
880d24d4204SJose E. Roman 
881d24d4204SJose E. Roman   PetscFunctionBegin;
882d24d4204SJose E. Roman   switch (type){
883d24d4204SJose E. Roman     case NORM_1:
884d24d4204SJose E. Roman       ntype = "1";
885d24d4204SJose E. Roman       lwork = PetscMax(a->locr,a->locc);
886d24d4204SJose E. Roman       break;
887d24d4204SJose E. Roman     case NORM_FROBENIUS:
888d24d4204SJose E. Roman       ntype = "F";
889d24d4204SJose E. Roman       work  = &dummy;
890d24d4204SJose E. Roman       break;
891d24d4204SJose E. Roman     case NORM_INFINITY:
892d24d4204SJose E. Roman       ntype = "I";
893d24d4204SJose E. Roman       lwork = PetscMax(a->locr,a->locc);
894d24d4204SJose E. Roman       break;
895d24d4204SJose E. Roman     default:
896d24d4204SJose E. Roman       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
897d24d4204SJose E. Roman   }
898d24d4204SJose E. Roman   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
899d24d4204SJose E. Roman   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
900d24d4204SJose E. Roman   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
901d24d4204SJose E. Roman   PetscFunctionReturn(0);
902d24d4204SJose E. Roman }
903d24d4204SJose E. Roman 
904d24d4204SJose E. Roman static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
905d24d4204SJose E. Roman {
906d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
907d24d4204SJose E. Roman   PetscErrorCode ierr;
908d24d4204SJose E. Roman 
909d24d4204SJose E. Roman   PetscFunctionBegin;
910d24d4204SJose E. Roman   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
911d24d4204SJose E. Roman   PetscFunctionReturn(0);
912d24d4204SJose E. Roman }
913d24d4204SJose E. Roman 
914d24d4204SJose E. Roman static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
915d24d4204SJose E. Roman {
916d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
917d24d4204SJose E. Roman   PetscErrorCode ierr;
918d24d4204SJose E. Roman   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
919d24d4204SJose E. Roman 
920d24d4204SJose E. Roman   PetscFunctionBegin;
921d24d4204SJose E. Roman   if (rows) {
922d24d4204SJose E. Roman     n     = a->locr;
923d24d4204SJose E. Roman     nb    = a->mb;
924d24d4204SJose E. Roman     isrc  = a->rsrc;
925d24d4204SJose E. Roman     nproc = a->grid->nprow;
926d24d4204SJose E. Roman     iproc = a->grid->myrow;
927d24d4204SJose E. Roman     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
928d24d4204SJose E. Roman     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929d24d4204SJose E. Roman     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
930d24d4204SJose E. Roman   }
931d24d4204SJose E. Roman   if (cols) {
932d24d4204SJose E. Roman     n     = a->locc;
933d24d4204SJose E. Roman     nb    = a->nb;
934d24d4204SJose E. Roman     isrc  = a->csrc;
935d24d4204SJose E. Roman     nproc = a->grid->npcol;
936d24d4204SJose E. Roman     iproc = a->grid->mycol;
937d24d4204SJose E. Roman     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
938d24d4204SJose E. Roman     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
939d24d4204SJose E. Roman     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
940d24d4204SJose E. Roman   }
941d24d4204SJose E. Roman   PetscFunctionReturn(0);
942d24d4204SJose E. Roman }
943d24d4204SJose E. Roman 
944d24d4204SJose E. Roman static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
945d24d4204SJose E. Roman {
946d24d4204SJose E. Roman   PetscErrorCode ierr;
947d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
948d24d4204SJose E. Roman   Mat            Bmpi;
949d24d4204SJose E. Roman   MPI_Comm       comm;
9504b1a79daSJose E. Roman   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
9514b1a79daSJose E. Roman   const PetscInt *ranges,*branges,*cwork;
9524b1a79daSJose E. Roman   const PetscScalar *vwork;
953d24d4204SJose E. Roman   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
954d24d4204SJose E. Roman   PetscScalar    *barray;
9554b1a79daSJose E. Roman   PetscBool      differ=PETSC_FALSE;
9564b1a79daSJose E. Roman   PetscMPIInt    size;
957d24d4204SJose E. Roman 
958d24d4204SJose E. Roman   PetscFunctionBegin;
959d24d4204SJose E. Roman   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
9604b1a79daSJose E. Roman   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
9614b1a79daSJose E. Roman 
9624b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
9634b1a79daSJose E. Roman     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
9644b1a79daSJose E. Roman     ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
9654b1a79daSJose E. Roman     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
9664b1a79daSJose E. Roman   }
9674b1a79daSJose E. Roman 
9684b1a79daSJose E. Roman   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
9694b1a79daSJose E. Roman     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
9704b1a79daSJose E. Roman     m = PETSC_DECIDE;
9714b1a79daSJose E. Roman     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
9724b1a79daSJose E. Roman     n = PETSC_DECIDE;
9734b1a79daSJose E. Roman     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
9744b1a79daSJose E. Roman     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
9754b1a79daSJose E. Roman     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
9764b1a79daSJose E. Roman     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
9774b1a79daSJose E. Roman 
9784b1a79daSJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
9794b1a79daSJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
9804b1a79daSJose E. Roman     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
9814b1a79daSJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
9824b1a79daSJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
9834b1a79daSJose E. Roman 
9844b1a79daSJose E. Roman     /* redistribute matrix */
9854b1a79daSJose E. Roman     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
9864b1a79daSJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
9874b1a79daSJose E. Roman     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
9884b1a79daSJose E. Roman     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9894b1a79daSJose E. Roman     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9904b1a79daSJose E. Roman 
9914b1a79daSJose E. Roman     /* transfer rows of auxiliary matrix to the final matrix B */
9924b1a79daSJose E. Roman     ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
9934b1a79daSJose E. Roman     for (i=rstart;i<rend;i++) {
9944b1a79daSJose E. Roman       ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
9954b1a79daSJose E. Roman       ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
9964b1a79daSJose E. Roman       ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
9974b1a79daSJose E. Roman     }
9984b1a79daSJose E. Roman     ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
9994b1a79daSJose E. Roman     ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10004b1a79daSJose E. Roman     ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
10014b1a79daSJose E. Roman 
10024b1a79daSJose E. Roman   } else {  /* normal cases */
1003d24d4204SJose E. Roman 
1004d24d4204SJose E. Roman     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1005d24d4204SJose E. Roman     else {
1006d24d4204SJose E. Roman       ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
100792c846b4SJose E. Roman       m = PETSC_DECIDE;
100892c846b4SJose E. Roman       ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
100992c846b4SJose E. Roman       n = PETSC_DECIDE;
101092c846b4SJose E. Roman       ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
101192c846b4SJose E. Roman       ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1012d24d4204SJose E. Roman       ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1013d24d4204SJose E. Roman       ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1014d24d4204SJose E. Roman     }
1015d24d4204SJose E. Roman 
1016d24d4204SJose E. Roman     /* create ScaLAPACK descriptor for B (1d block distribution) */
1017d24d4204SJose E. Roman     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
1018d24d4204SJose E. Roman     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1019d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1020d24d4204SJose E. Roman     PetscCheckScaLapackInfo("descinit",info);
1021d24d4204SJose E. Roman 
1022d24d4204SJose E. Roman     /* redistribute matrix */
1023d24d4204SJose E. Roman     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1024d24d4204SJose E. Roman     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1025d24d4204SJose E. Roman     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1026d24d4204SJose E. Roman 
1027d24d4204SJose E. Roman     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1028d24d4204SJose E. Roman     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029d24d4204SJose E. Roman     if (reuse == MAT_INPLACE_MATRIX) {
1030d24d4204SJose E. Roman       ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1031d24d4204SJose E. Roman     } else *B = Bmpi;
10324b1a79daSJose E. Roman   }
1033d24d4204SJose E. Roman   PetscFunctionReturn(0);
1034d24d4204SJose E. Roman }
1035d24d4204SJose E. Roman 
1036d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1037d24d4204SJose E. Roman {
1038d24d4204SJose E. Roman   PetscErrorCode ierr;
1039d24d4204SJose E. Roman   Mat_ScaLAPACK  *b;
1040d24d4204SJose E. Roman   Mat            Bmpi;
1041d24d4204SJose E. Roman   MPI_Comm       comm;
104292c846b4SJose E. Roman   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1043d24d4204SJose E. Roman   const PetscInt *ranges;
1044d24d4204SJose E. Roman   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1045d24d4204SJose E. Roman   PetscScalar    *aarray;
10464e8b52a1SJose E. Roman   PetscInt       lda;
1047d24d4204SJose E. Roman 
1048d24d4204SJose E. Roman   PetscFunctionBegin;
1049d24d4204SJose E. Roman   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1050d24d4204SJose E. Roman 
1051d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1052d24d4204SJose E. Roman   else {
1053d24d4204SJose E. Roman     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
105492c846b4SJose E. Roman     m = PETSC_DECIDE;
105592c846b4SJose E. Roman     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
105692c846b4SJose E. Roman     n = PETSC_DECIDE;
105792c846b4SJose E. Roman     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
105892c846b4SJose E. Roman     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1059d24d4204SJose E. Roman     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1060d24d4204SJose E. Roman     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1061d24d4204SJose E. Roman   }
1062d24d4204SJose E. Roman   b = (Mat_ScaLAPACK*)Bmpi->data;
1063d24d4204SJose E. Roman 
1064d24d4204SJose E. Roman   /* create ScaLAPACK descriptor for A (1d block distribution) */
1065d24d4204SJose E. Roman   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1066d24d4204SJose E. Roman   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
10674e8b52a1SJose E. Roman   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
10684e8b52a1SJose E. Roman   lld = PetscMax(lda,1);  /* local leading dimension */
1069d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1070d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
1071d24d4204SJose E. Roman 
1072d24d4204SJose E. Roman   /* redistribute matrix */
1073d24d4204SJose E. Roman   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1074d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1075d24d4204SJose E. Roman   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1076d24d4204SJose E. Roman 
1077d24d4204SJose E. Roman   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1078d24d4204SJose E. Roman   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1079d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) {
1080d24d4204SJose E. Roman     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1081d24d4204SJose E. Roman   } else *B = Bmpi;
1082d24d4204SJose E. Roman   PetscFunctionReturn(0);
1083d24d4204SJose E. Roman }
1084d24d4204SJose E. Roman 
1085d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1086d24d4204SJose E. Roman {
1087d24d4204SJose E. Roman   Mat               mat_scal;
1088d24d4204SJose E. Roman   PetscErrorCode    ierr;
1089d24d4204SJose E. Roman   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1090d24d4204SJose E. Roman   const PetscInt    *cols;
1091d24d4204SJose E. Roman   const PetscScalar *vals;
1092d24d4204SJose E. Roman 
1093d24d4204SJose E. Roman   PetscFunctionBegin;
1094d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1095d24d4204SJose E. Roman     mat_scal = *newmat;
1096d24d4204SJose E. Roman     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1097d24d4204SJose E. Roman   } else {
1098d24d4204SJose E. Roman     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1099d24d4204SJose E. Roman     m = PETSC_DECIDE;
1100d24d4204SJose E. Roman     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1101d24d4204SJose E. Roman     n = PETSC_DECIDE;
1102d24d4204SJose E. Roman     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1103d24d4204SJose E. Roman     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1104d24d4204SJose E. Roman     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1105d24d4204SJose E. Roman     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1106d24d4204SJose E. Roman   }
1107d24d4204SJose E. Roman   for (row=rstart;row<rend;row++) {
1108d24d4204SJose E. Roman     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1109d24d4204SJose E. Roman     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1110d24d4204SJose E. Roman     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1111d24d4204SJose E. Roman   }
1112d24d4204SJose E. Roman   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1113d24d4204SJose E. Roman   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1114d24d4204SJose E. Roman 
1115d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1116d24d4204SJose E. Roman   else *newmat = mat_scal;
1117d24d4204SJose E. Roman   PetscFunctionReturn(0);
1118d24d4204SJose E. Roman }
1119d24d4204SJose E. Roman 
1120d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1121d24d4204SJose E. Roman {
1122d24d4204SJose E. Roman   Mat               mat_scal;
1123d24d4204SJose E. Roman   PetscErrorCode    ierr;
1124d24d4204SJose E. Roman   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1125d24d4204SJose E. Roman   const PetscInt    *cols;
1126d24d4204SJose E. Roman   const PetscScalar *vals;
1127d24d4204SJose E. Roman   PetscScalar       v;
1128d24d4204SJose E. Roman 
1129d24d4204SJose E. Roman   PetscFunctionBegin;
1130d24d4204SJose E. Roman   if (reuse == MAT_REUSE_MATRIX) {
1131d24d4204SJose E. Roman     mat_scal = *newmat;
1132d24d4204SJose E. Roman     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1133d24d4204SJose E. Roman   } else {
1134d24d4204SJose E. Roman     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1135d24d4204SJose E. Roman     m = PETSC_DECIDE;
1136d24d4204SJose E. Roman     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1137d24d4204SJose E. Roman     n = PETSC_DECIDE;
1138d24d4204SJose E. Roman     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1139d24d4204SJose E. Roman     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1140d24d4204SJose E. Roman     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1141d24d4204SJose E. Roman     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1142d24d4204SJose E. Roman   }
1143d24d4204SJose E. Roman   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1144d24d4204SJose E. Roman   for (row=rstart;row<rend;row++) {
1145d24d4204SJose E. Roman     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1146d24d4204SJose E. Roman     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1147d24d4204SJose E. Roman     for (j=0;j<ncols;j++) { /* lower triangular part */
1148d24d4204SJose E. Roman       if (cols[j] == row) continue;
1149d24d4204SJose E. Roman       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1150d24d4204SJose E. Roman       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1151d24d4204SJose E. Roman     }
1152d24d4204SJose E. Roman     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1153d24d4204SJose E. Roman   }
1154d24d4204SJose E. Roman   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1155d24d4204SJose E. Roman   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1156d24d4204SJose E. Roman   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1157d24d4204SJose E. Roman 
1158d24d4204SJose E. Roman   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1159d24d4204SJose E. Roman   else *newmat = mat_scal;
1160d24d4204SJose E. Roman   PetscFunctionReturn(0);
1161d24d4204SJose E. Roman }
1162d24d4204SJose E. Roman 
1163d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1164d24d4204SJose E. Roman {
1165d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1166d24d4204SJose E. Roman   PetscErrorCode ierr;
1167d24d4204SJose E. Roman   PetscInt       sz=0;
1168d24d4204SJose E. Roman 
1169d24d4204SJose E. Roman   PetscFunctionBegin;
1170d24d4204SJose E. Roman   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1171d24d4204SJose E. Roman   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1172d24d4204SJose E. Roman   if (!a->lld) a->lld = a->locr;
1173d24d4204SJose E. Roman 
1174d24d4204SJose E. Roman   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1175d24d4204SJose E. Roman   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1176d24d4204SJose E. Roman   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1177d24d4204SJose E. Roman   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1178d24d4204SJose E. Roman 
1179d24d4204SJose E. Roman   A->preallocated = PETSC_TRUE;
1180d24d4204SJose E. Roman   PetscFunctionReturn(0);
1181d24d4204SJose E. Roman }
1182d24d4204SJose E. Roman 
1183d24d4204SJose E. Roman static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1184d24d4204SJose E. Roman {
1185d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1186d24d4204SJose E. Roman   PetscErrorCode     ierr;
1187d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1188d24d4204SJose E. Roman   PetscBool          flg;
1189d24d4204SJose E. Roman   MPI_Comm           icomm;
1190d24d4204SJose E. Roman 
1191d24d4204SJose E. Roman   PetscFunctionBegin;
1192d24d4204SJose E. Roman   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1193d24d4204SJose E. Roman   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1194d24d4204SJose E. Roman   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1195d24d4204SJose E. Roman   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1196d24d4204SJose E. Roman   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1197d24d4204SJose E. Roman   if (--grid->grid_refct == 0) {
1198d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxt);
1199d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxrow);
1200d24d4204SJose E. Roman     Cblacs_gridexit(grid->ictxcol);
1201d24d4204SJose E. Roman     ierr = PetscFree(grid);CHKERRQ(ierr);
1202*f7ec113fSDamian Marek     ierr = MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
1203d24d4204SJose E. Roman   }
1204d24d4204SJose E. Roman   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1205d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1206d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1207d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1208d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1209d24d4204SJose E. Roman   ierr = PetscFree(A->data);CHKERRQ(ierr);
1210d24d4204SJose E. Roman   PetscFunctionReturn(0);
1211d24d4204SJose E. Roman }
1212d24d4204SJose E. Roman 
1213d24d4204SJose E. Roman PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1214d24d4204SJose E. Roman {
1215d24d4204SJose E. Roman   PetscErrorCode ierr;
1216d24d4204SJose E. Roman   const PetscInt *ranges;
1217d24d4204SJose E. Roman   PetscMPIInt    size;
1218d24d4204SJose E. Roman   PetscInt       i,n;
1219d24d4204SJose E. Roman 
1220d24d4204SJose E. Roman   PetscFunctionBegin;
1221d24d4204SJose E. Roman   ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr);
1222d24d4204SJose E. Roman   if (size>2) {
1223d24d4204SJose E. Roman     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1224d24d4204SJose E. Roman     n = ranges[1]-ranges[0];
1225d24d4204SJose E. Roman     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1226d24d4204SJose E. Roman     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1227d24d4204SJose E. Roman   }
1228d24d4204SJose E. Roman   PetscFunctionReturn(0);
1229d24d4204SJose E. Roman }
1230d24d4204SJose E. Roman 
1231d24d4204SJose E. Roman PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1232d24d4204SJose E. Roman {
1233d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1234d24d4204SJose E. Roman   PetscErrorCode ierr;
1235d24d4204SJose E. Roman   PetscBLASInt   info=0;
1236d24d4204SJose E. Roman 
1237d24d4204SJose E. Roman   PetscFunctionBegin;
1238d24d4204SJose E. Roman   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1239d24d4204SJose E. Roman   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1240d24d4204SJose E. Roman 
1241d24d4204SJose E. Roman   /* check that the layout is as enforced by MatCreateScaLAPACK */
1242d24d4204SJose E. Roman   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1243d24d4204SJose E. Roman   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1244d24d4204SJose E. Roman 
1245d24d4204SJose E. Roman   /* compute local sizes */
1246d24d4204SJose E. Roman   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1247d24d4204SJose E. Roman   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1248d24d4204SJose E. Roman   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1249d24d4204SJose E. Roman   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1250d24d4204SJose E. Roman   a->lld  = PetscMax(1,a->locr);
1251d24d4204SJose E. Roman 
1252d24d4204SJose E. Roman   /* allocate local array */
1253d24d4204SJose E. Roman   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1254d24d4204SJose E. Roman 
1255d24d4204SJose E. Roman   /* set up ScaLAPACK descriptor */
1256d24d4204SJose E. Roman   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1257d24d4204SJose E. Roman   PetscCheckScaLapackInfo("descinit",info);
1258d24d4204SJose E. Roman   PetscFunctionReturn(0);
1259d24d4204SJose E. Roman }
1260d24d4204SJose E. Roman 
1261d24d4204SJose E. Roman PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1262d24d4204SJose E. Roman {
1263d24d4204SJose E. Roman   PetscErrorCode ierr;
1264d24d4204SJose E. Roman   PetscInt       nstash,reallocs;
1265d24d4204SJose E. Roman 
1266d24d4204SJose E. Roman   PetscFunctionBegin;
1267d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
1268d24d4204SJose E. Roman   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1269d24d4204SJose E. Roman   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1270d24d4204SJose E. Roman   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1271d24d4204SJose E. Roman   PetscFunctionReturn(0);
1272d24d4204SJose E. Roman }
1273d24d4204SJose E. Roman 
1274d24d4204SJose E. Roman PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1275d24d4204SJose E. Roman {
1276d24d4204SJose E. Roman   PetscErrorCode ierr;
1277d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1278d24d4204SJose E. Roman   PetscMPIInt    n;
1279d24d4204SJose E. Roman   PetscInt       i,flg,*row,*col;
1280d24d4204SJose E. Roman   PetscScalar    *val;
1281d24d4204SJose E. Roman   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1282d24d4204SJose E. Roman 
1283d24d4204SJose E. Roman   PetscFunctionBegin;
1284d24d4204SJose E. Roman   if (A->nooffprocentries) PetscFunctionReturn(0);
1285d24d4204SJose E. Roman   while (1) {
1286d24d4204SJose E. Roman     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1287d24d4204SJose E. Roman     if (!flg) break;
1288d24d4204SJose E. Roman     for (i=0;i<n;i++) {
1289d24d4204SJose E. Roman       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1290d24d4204SJose E. Roman       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1291d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1292d24d4204SJose E. Roman       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1293d24d4204SJose E. Roman       switch (A->insertmode) {
1294d24d4204SJose E. Roman         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1295d24d4204SJose E. Roman         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1296d24d4204SJose E. Roman         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1297d24d4204SJose E. Roman       }
1298d24d4204SJose E. Roman     }
1299d24d4204SJose E. Roman   }
1300d24d4204SJose E. Roman   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1301d24d4204SJose E. Roman   PetscFunctionReturn(0);
1302d24d4204SJose E. Roman }
1303d24d4204SJose E. Roman 
1304d24d4204SJose E. Roman PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1305d24d4204SJose E. Roman {
1306d24d4204SJose E. Roman   PetscErrorCode ierr;
1307d24d4204SJose E. Roman   Mat            Adense,As;
1308d24d4204SJose E. Roman   MPI_Comm       comm;
1309d24d4204SJose E. Roman 
1310d24d4204SJose E. Roman   PetscFunctionBegin;
1311d24d4204SJose E. Roman   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1312d24d4204SJose E. Roman   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1313d24d4204SJose E. Roman   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1314d24d4204SJose E. Roman   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1315d24d4204SJose E. Roman   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1316d24d4204SJose E. Roman   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1317d24d4204SJose E. Roman   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1318d24d4204SJose E. Roman   PetscFunctionReturn(0);
1319d24d4204SJose E. Roman }
1320d24d4204SJose E. Roman 
1321d24d4204SJose E. Roman /* -------------------------------------------------------------------*/
1322d24d4204SJose E. Roman static struct _MatOps MatOps_Values = {
1323d24d4204SJose E. Roman        MatSetValues_ScaLAPACK,
1324d24d4204SJose E. Roman        0,
1325d24d4204SJose E. Roman        0,
1326d24d4204SJose E. Roman        MatMult_ScaLAPACK,
1327d24d4204SJose E. Roman /* 4*/ MatMultAdd_ScaLAPACK,
1328d24d4204SJose E. Roman        MatMultTranspose_ScaLAPACK,
1329d24d4204SJose E. Roman        MatMultTransposeAdd_ScaLAPACK,
1330d24d4204SJose E. Roman        MatSolve_ScaLAPACK,
1331d24d4204SJose E. Roman        MatSolveAdd_ScaLAPACK,
1332d24d4204SJose E. Roman        0,
1333d24d4204SJose E. Roman /*10*/ 0,
1334d24d4204SJose E. Roman        MatLUFactor_ScaLAPACK,
1335d24d4204SJose E. Roman        MatCholeskyFactor_ScaLAPACK,
1336d24d4204SJose E. Roman        0,
1337d24d4204SJose E. Roman        MatTranspose_ScaLAPACK,
1338d24d4204SJose E. Roman /*15*/ MatGetInfo_ScaLAPACK,
1339d24d4204SJose E. Roman        0,
1340d24d4204SJose E. Roman        MatGetDiagonal_ScaLAPACK,
1341d24d4204SJose E. Roman        MatDiagonalScale_ScaLAPACK,
1342d24d4204SJose E. Roman        MatNorm_ScaLAPACK,
1343d24d4204SJose E. Roman /*20*/ MatAssemblyBegin_ScaLAPACK,
1344d24d4204SJose E. Roman        MatAssemblyEnd_ScaLAPACK,
1345d24d4204SJose E. Roman        MatSetOption_ScaLAPACK,
1346d24d4204SJose E. Roman        MatZeroEntries_ScaLAPACK,
1347d24d4204SJose E. Roman /*24*/ 0,
1348d24d4204SJose E. Roman        MatLUFactorSymbolic_ScaLAPACK,
1349d24d4204SJose E. Roman        MatLUFactorNumeric_ScaLAPACK,
1350d24d4204SJose E. Roman        MatCholeskyFactorSymbolic_ScaLAPACK,
1351d24d4204SJose E. Roman        MatCholeskyFactorNumeric_ScaLAPACK,
1352d24d4204SJose E. Roman /*29*/ MatSetUp_ScaLAPACK,
1353d24d4204SJose E. Roman        0,
1354d24d4204SJose E. Roman        0,
1355d24d4204SJose E. Roman        0,
1356d24d4204SJose E. Roman        0,
1357d24d4204SJose E. Roman /*34*/ MatDuplicate_ScaLAPACK,
1358d24d4204SJose E. Roman        0,
1359d24d4204SJose E. Roman        0,
1360d24d4204SJose E. Roman        0,
1361d24d4204SJose E. Roman        0,
1362d24d4204SJose E. Roman /*39*/ MatAXPY_ScaLAPACK,
1363d24d4204SJose E. Roman        0,
1364d24d4204SJose E. Roman        0,
1365d24d4204SJose E. Roman        0,
1366d24d4204SJose E. Roman        MatCopy_ScaLAPACK,
1367d24d4204SJose E. Roman /*44*/ 0,
1368d24d4204SJose E. Roman        MatScale_ScaLAPACK,
1369d24d4204SJose E. Roman        MatShift_ScaLAPACK,
1370d24d4204SJose E. Roman        0,
1371d24d4204SJose E. Roman        0,
1372d24d4204SJose E. Roman /*49*/ 0,
1373d24d4204SJose E. Roman        0,
1374d24d4204SJose E. Roman        0,
1375d24d4204SJose E. Roman        0,
1376d24d4204SJose E. Roman        0,
1377d24d4204SJose E. Roman /*54*/ 0,
1378d24d4204SJose E. Roman        0,
1379d24d4204SJose E. Roman        0,
1380d24d4204SJose E. Roman        0,
1381d24d4204SJose E. Roman        0,
1382d24d4204SJose E. Roman /*59*/ 0,
1383d24d4204SJose E. Roman        MatDestroy_ScaLAPACK,
1384d24d4204SJose E. Roman        MatView_ScaLAPACK,
1385d24d4204SJose E. Roman        0,
1386d24d4204SJose E. Roman        0,
1387d24d4204SJose E. Roman /*64*/ 0,
1388d24d4204SJose E. Roman        0,
1389d24d4204SJose E. Roman        0,
1390d24d4204SJose E. Roman        0,
1391d24d4204SJose E. Roman        0,
1392d24d4204SJose E. Roman /*69*/ 0,
1393d24d4204SJose E. Roman        0,
1394d24d4204SJose E. Roman        MatConvert_ScaLAPACK_Dense,
1395d24d4204SJose E. Roman        0,
1396d24d4204SJose E. Roman        0,
1397d24d4204SJose E. Roman /*74*/ 0,
1398d24d4204SJose E. Roman        0,
1399d24d4204SJose E. Roman        0,
1400d24d4204SJose E. Roman        0,
1401d24d4204SJose E. Roman        0,
1402d24d4204SJose E. Roman /*79*/ 0,
1403d24d4204SJose E. Roman        0,
1404d24d4204SJose E. Roman        0,
1405d24d4204SJose E. Roman        0,
1406d24d4204SJose E. Roman        MatLoad_ScaLAPACK,
1407d24d4204SJose E. Roman /*84*/ 0,
1408d24d4204SJose E. Roman        0,
1409d24d4204SJose E. Roman        0,
1410d24d4204SJose E. Roman        0,
1411d24d4204SJose E. Roman        0,
1412d24d4204SJose E. Roman /*89*/ 0,
1413d24d4204SJose E. Roman        0,
1414d24d4204SJose E. Roman        MatMatMultNumeric_ScaLAPACK,
1415d24d4204SJose E. Roman        0,
1416d24d4204SJose E. Roman        0,
1417d24d4204SJose E. Roman /*94*/ 0,
1418d24d4204SJose E. Roman        0,
1419d24d4204SJose E. Roman        0,
1420d24d4204SJose E. Roman        MatMatTransposeMultNumeric_ScaLAPACK,
1421d24d4204SJose E. Roman        0,
1422d24d4204SJose E. Roman /*99*/ MatProductSetFromOptions_ScaLAPACK,
1423d24d4204SJose E. Roman        0,
1424d24d4204SJose E. Roman        0,
1425d24d4204SJose E. Roman        MatConjugate_ScaLAPACK,
1426d24d4204SJose E. Roman        0,
1427d24d4204SJose E. Roman /*104*/0,
1428d24d4204SJose E. Roman        0,
1429d24d4204SJose E. Roman        0,
1430d24d4204SJose E. Roman        0,
1431d24d4204SJose E. Roman        0,
1432d24d4204SJose E. Roman /*109*/MatMatSolve_ScaLAPACK,
1433d24d4204SJose E. Roman        0,
1434d24d4204SJose E. Roman        0,
1435d24d4204SJose E. Roman        0,
1436d24d4204SJose E. Roman        MatMissingDiagonal_ScaLAPACK,
1437d24d4204SJose E. Roman /*114*/0,
1438d24d4204SJose E. Roman        0,
1439d24d4204SJose E. Roman        0,
1440d24d4204SJose E. Roman        0,
1441d24d4204SJose E. Roman        0,
1442d24d4204SJose E. Roman /*119*/0,
1443d24d4204SJose E. Roman        MatHermitianTranspose_ScaLAPACK,
1444d24d4204SJose E. Roman        0,
1445d24d4204SJose E. Roman        0,
1446d24d4204SJose E. Roman        0,
1447d24d4204SJose E. Roman /*124*/0,
1448d24d4204SJose E. Roman        0,
1449d24d4204SJose E. Roman        0,
1450d24d4204SJose E. Roman        0,
1451d24d4204SJose E. Roman        0,
1452d24d4204SJose E. Roman /*129*/0,
1453d24d4204SJose E. Roman        0,
1454d24d4204SJose E. Roman        0,
1455d24d4204SJose E. Roman        0,
1456d24d4204SJose E. Roman        0,
1457d24d4204SJose E. Roman /*134*/0,
1458d24d4204SJose E. Roman        0,
1459d24d4204SJose E. Roman        0,
1460d24d4204SJose E. Roman        0,
1461d24d4204SJose E. Roman        0,
1462d24d4204SJose E. Roman        0,
1463d24d4204SJose E. Roman /*140*/0,
1464d24d4204SJose E. Roman        0,
1465d24d4204SJose E. Roman        0,
1466d24d4204SJose E. Roman        0,
1467d24d4204SJose E. Roman        0,
1468d24d4204SJose E. Roman /*145*/0,
1469d24d4204SJose E. Roman        0,
1470d24d4204SJose E. Roman        0
1471d24d4204SJose E. Roman };
1472d24d4204SJose E. Roman 
1473d24d4204SJose E. Roman static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1474d24d4204SJose E. Roman {
1475d24d4204SJose E. Roman   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1476d24d4204SJose E. Roman   PetscInt           size=stash->size,nsends;
1477d24d4204SJose E. Roman   PetscErrorCode     ierr;
1478d24d4204SJose E. Roman   PetscInt           count,*sindices,**rindices,i,j,l;
1479d24d4204SJose E. Roman   PetscScalar        **rvalues,*svalues;
1480d24d4204SJose E. Roman   MPI_Comm           comm = stash->comm;
1481d24d4204SJose E. Roman   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1482d24d4204SJose E. Roman   PetscMPIInt        *sizes,*nlengths,nreceives;
1483d24d4204SJose E. Roman   PetscInt           *sp_idx,*sp_idy;
1484d24d4204SJose E. Roman   PetscScalar        *sp_val;
1485d24d4204SJose E. Roman   PetscMatStashSpace space,space_next;
1486d24d4204SJose E. Roman   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1487d24d4204SJose E. Roman   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1488d24d4204SJose E. Roman 
1489d24d4204SJose E. Roman   PetscFunctionBegin;
1490d24d4204SJose E. Roman   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1491d24d4204SJose E. Roman     InsertMode addv;
1492d24d4204SJose E. Roman     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1493d24d4204SJose E. Roman     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1494d24d4204SJose E. Roman     mat->insertmode = addv; /* in case this processor had no cache */
1495d24d4204SJose E. Roman   }
1496d24d4204SJose E. Roman 
1497d24d4204SJose E. Roman   bs2 = stash->bs*stash->bs;
1498d24d4204SJose E. Roman 
1499d24d4204SJose E. Roman   /*  first count number of contributors to each processor */
1500d24d4204SJose E. Roman   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1501d24d4204SJose E. Roman   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1502d24d4204SJose E. Roman 
1503d24d4204SJose E. Roman   i     = j    = 0;
1504d24d4204SJose E. Roman   space = stash->space_head;
1505d24d4204SJose E. Roman   while (space) {
1506d24d4204SJose E. Roman     space_next = space->next;
1507d24d4204SJose E. Roman     for (l=0; l<space->local_used; l++) {
1508d24d4204SJose E. Roman       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1509d24d4204SJose E. Roman       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1510d24d4204SJose E. Roman       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1511d24d4204SJose E. Roman       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1512d24d4204SJose E. Roman       nlengths[j]++; owner[i] = j;
1513d24d4204SJose E. Roman       i++;
1514d24d4204SJose E. Roman     }
1515d24d4204SJose E. Roman     space = space_next;
1516d24d4204SJose E. Roman   }
1517d24d4204SJose E. Roman 
1518d24d4204SJose E. Roman   /* Now check what procs get messages - and compute nsends. */
1519d24d4204SJose E. Roman   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1520d24d4204SJose E. Roman   for (i=0, nsends=0; i<size; i++) {
1521d24d4204SJose E. Roman     if (nlengths[i]) {
1522d24d4204SJose E. Roman       sizes[i] = 1; nsends++;
1523d24d4204SJose E. Roman     }
1524d24d4204SJose E. Roman   }
1525d24d4204SJose E. Roman 
1526d24d4204SJose E. Roman   {PetscMPIInt *onodes,*olengths;
1527d24d4204SJose E. Roman    /* Determine the number of messages to expect, their lengths, from from-ids */
1528d24d4204SJose E. Roman    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1529d24d4204SJose E. Roman    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1530d24d4204SJose E. Roman    /* since clubbing row,col - lengths are multiplied by 2 */
1531d24d4204SJose E. Roman    for (i=0; i<nreceives; i++) olengths[i] *=2;
1532d24d4204SJose E. Roman    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1533d24d4204SJose E. Roman    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1534d24d4204SJose E. Roman    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1535d24d4204SJose E. Roman    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1536d24d4204SJose E. Roman    ierr = PetscFree(onodes);CHKERRQ(ierr);
1537d24d4204SJose E. Roman    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1538d24d4204SJose E. Roman 
1539d24d4204SJose E. Roman   /* do sends:
1540d24d4204SJose E. Roman       1) starts[i] gives the starting index in svalues for stuff going to
1541d24d4204SJose E. Roman          the ith processor
1542d24d4204SJose E. Roman   */
1543d24d4204SJose E. Roman   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1544d24d4204SJose E. Roman   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1545d24d4204SJose E. Roman   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1546d24d4204SJose E. Roman   /* use 2 sends the first with all_a, the next with all_i and all_j */
1547d24d4204SJose E. Roman   startv[0] = 0; starti[0] = 0;
1548d24d4204SJose E. Roman   for (i=1; i<size; i++) {
1549d24d4204SJose E. Roman     startv[i] = startv[i-1] + nlengths[i-1];
1550d24d4204SJose E. Roman     starti[i] = starti[i-1] + 2*nlengths[i-1];
1551d24d4204SJose E. Roman   }
1552d24d4204SJose E. Roman 
1553d24d4204SJose E. Roman   i     = 0;
1554d24d4204SJose E. Roman   space = stash->space_head;
1555d24d4204SJose E. Roman   while (space) {
1556d24d4204SJose E. Roman     space_next = space->next;
1557d24d4204SJose E. Roman     sp_idx     = space->idx;
1558d24d4204SJose E. Roman     sp_idy     = space->idy;
1559d24d4204SJose E. Roman     sp_val     = space->val;
1560d24d4204SJose E. Roman     for (l=0; l<space->local_used; l++) {
1561d24d4204SJose E. Roman       j = owner[i];
1562d24d4204SJose E. Roman       if (bs2 == 1) {
1563d24d4204SJose E. Roman         svalues[startv[j]] = sp_val[l];
1564d24d4204SJose E. Roman       } else {
1565d24d4204SJose E. Roman         PetscInt    k;
1566d24d4204SJose E. Roman         PetscScalar *buf1,*buf2;
1567d24d4204SJose E. Roman         buf1 = svalues+bs2*startv[j];
1568d24d4204SJose E. Roman         buf2 = space->val + bs2*l;
1569d24d4204SJose E. Roman         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1570d24d4204SJose E. Roman       }
1571d24d4204SJose E. Roman       sindices[starti[j]]             = sp_idx[l];
1572d24d4204SJose E. Roman       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1573d24d4204SJose E. Roman       startv[j]++;
1574d24d4204SJose E. Roman       starti[j]++;
1575d24d4204SJose E. Roman       i++;
1576d24d4204SJose E. Roman     }
1577d24d4204SJose E. Roman     space = space_next;
1578d24d4204SJose E. Roman   }
1579d24d4204SJose E. Roman   startv[0] = 0;
1580d24d4204SJose E. Roman   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1581d24d4204SJose E. Roman 
1582d24d4204SJose E. Roman   for (i=0,count=0; i<size; i++) {
1583d24d4204SJose E. Roman     if (sizes[i]) {
1584d24d4204SJose E. Roman       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
1585d24d4204SJose E. Roman       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
1586d24d4204SJose E. Roman     }
1587d24d4204SJose E. Roman   }
1588d24d4204SJose E. Roman #if defined(PETSC_USE_INFO)
1589d24d4204SJose E. Roman   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1590d24d4204SJose E. Roman   for (i=0; i<size; i++) {
1591d24d4204SJose E. Roman     if (sizes[i]) {
1592d24d4204SJose E. Roman       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1593d24d4204SJose E. Roman     }
1594d24d4204SJose E. Roman   }
1595d24d4204SJose E. Roman #endif
1596d24d4204SJose E. Roman   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1597d24d4204SJose E. Roman   ierr = PetscFree(owner);CHKERRQ(ierr);
1598d24d4204SJose E. Roman   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1599d24d4204SJose E. Roman   ierr = PetscFree(sizes);CHKERRQ(ierr);
1600d24d4204SJose E. Roman 
1601d24d4204SJose E. Roman   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1602d24d4204SJose E. Roman   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1603d24d4204SJose E. Roman 
1604d24d4204SJose E. Roman   for (i=0; i<nreceives; i++) {
1605d24d4204SJose E. Roman     recv_waits[2*i]   = recv_waits1[i];
1606d24d4204SJose E. Roman     recv_waits[2*i+1] = recv_waits2[i];
1607d24d4204SJose E. Roman   }
1608d24d4204SJose E. Roman   stash->recv_waits = recv_waits;
1609d24d4204SJose E. Roman 
1610d24d4204SJose E. Roman   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1611d24d4204SJose E. Roman   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1612d24d4204SJose E. Roman 
1613d24d4204SJose E. Roman   stash->svalues         = svalues;
1614d24d4204SJose E. Roman   stash->sindices        = sindices;
1615d24d4204SJose E. Roman   stash->rvalues         = rvalues;
1616d24d4204SJose E. Roman   stash->rindices        = rindices;
1617d24d4204SJose E. Roman   stash->send_waits      = send_waits;
1618d24d4204SJose E. Roman   stash->nsends          = nsends;
1619d24d4204SJose E. Roman   stash->nrecvs          = nreceives;
1620d24d4204SJose E. Roman   stash->reproduce_count = 0;
1621d24d4204SJose E. Roman   PetscFunctionReturn(0);
1622d24d4204SJose E. Roman }
1623d24d4204SJose E. Roman 
1624d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1625d24d4204SJose E. Roman {
1626d24d4204SJose E. Roman   PetscErrorCode ierr;
1627d24d4204SJose E. Roman   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1628d24d4204SJose E. Roman 
1629d24d4204SJose E. Roman   PetscFunctionBegin;
1630d24d4204SJose E. Roman   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1631d24d4204SJose E. Roman   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1632d24d4204SJose E. Roman   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1633d24d4204SJose E. Roman   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1634d24d4204SJose E. Roman   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1635d24d4204SJose E. Roman   PetscFunctionReturn(0);
1636d24d4204SJose E. Roman }
1637d24d4204SJose E. Roman 
1638d24d4204SJose E. Roman /*@
1639d24d4204SJose E. Roman    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1640d24d4204SJose E. Roman    the ScaLAPACK matrix
1641d24d4204SJose E. Roman 
1642d24d4204SJose E. Roman    Logically Collective on A
1643d24d4204SJose E. Roman 
1644d24d4204SJose E. Roman    Input Parameter:
1645d24d4204SJose E. Roman +  A  - a MATSCALAPACK matrix
1646d24d4204SJose E. Roman .  mb - the row block size
1647d24d4204SJose E. Roman -  nb - the column block size
1648d24d4204SJose E. Roman 
1649d24d4204SJose E. Roman    Level: intermediate
1650d24d4204SJose E. Roman 
1651d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1652d24d4204SJose E. Roman @*/
1653d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1654d24d4204SJose E. Roman {
1655d24d4204SJose E. Roman   PetscErrorCode ierr;
1656d24d4204SJose E. Roman 
1657d24d4204SJose E. Roman   PetscFunctionBegin;
1658d24d4204SJose E. Roman   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1659d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A,mb,2);
1660d24d4204SJose E. Roman   PetscValidLogicalCollectiveInt(A,nb,3);
1661d24d4204SJose E. Roman   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1662d24d4204SJose E. Roman   PetscFunctionReturn(0);
1663d24d4204SJose E. Roman }
1664d24d4204SJose E. Roman 
1665d24d4204SJose E. Roman static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1666d24d4204SJose E. Roman {
1667d24d4204SJose E. Roman   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1668d24d4204SJose E. Roman 
1669d24d4204SJose E. Roman   PetscFunctionBegin;
1670d24d4204SJose E. Roman   if (mb) *mb = a->mb;
1671d24d4204SJose E. Roman   if (nb) *nb = a->nb;
1672d24d4204SJose E. Roman   PetscFunctionReturn(0);
1673d24d4204SJose E. Roman }
1674d24d4204SJose E. Roman 
1675d24d4204SJose E. Roman /*@
1676d24d4204SJose E. Roman    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1677d24d4204SJose E. Roman    the ScaLAPACK matrix
1678d24d4204SJose E. Roman 
1679d24d4204SJose E. Roman    Not collective
1680d24d4204SJose E. Roman 
1681d24d4204SJose E. Roman    Input Parameter:
1682d24d4204SJose E. Roman .  A  - a MATSCALAPACK matrix
1683d24d4204SJose E. Roman 
1684d24d4204SJose E. Roman    Output Parameters:
1685d24d4204SJose E. Roman +  mb - the row block size
1686d24d4204SJose E. Roman -  nb - the column block size
1687d24d4204SJose E. Roman 
1688d24d4204SJose E. Roman    Level: intermediate
1689d24d4204SJose E. Roman 
1690d24d4204SJose E. Roman .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1691d24d4204SJose E. Roman @*/
1692d24d4204SJose E. Roman PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1693d24d4204SJose E. Roman {
1694d24d4204SJose E. Roman   PetscErrorCode ierr;
1695d24d4204SJose E. Roman 
1696d24d4204SJose E. Roman   PetscFunctionBegin;
1697d24d4204SJose E. Roman   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1698d24d4204SJose E. Roman   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1699d24d4204SJose E. Roman   PetscFunctionReturn(0);
1700d24d4204SJose E. Roman }
1701d24d4204SJose E. Roman 
1702d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1703d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1704d24d4204SJose E. Roman 
1705d24d4204SJose E. Roman /*MC
1706d24d4204SJose E. Roman    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1707d24d4204SJose E. Roman 
1708d24d4204SJose E. Roman    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1709d24d4204SJose E. Roman 
1710d24d4204SJose E. Roman    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1711d24d4204SJose E. Roman 
1712d24d4204SJose E. Roman    Options Database Keys:
1713d24d4204SJose E. Roman +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1714d24d4204SJose E. Roman .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1715d24d4204SJose E. Roman -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1716d24d4204SJose E. Roman 
1717d24d4204SJose E. Roman    Level: beginner
1718d24d4204SJose E. Roman 
1719d24d4204SJose E. Roman .seealso: MATDENSE, MATELEMENTAL
1720d24d4204SJose E. Roman M*/
1721d24d4204SJose E. Roman 
1722d24d4204SJose E. Roman PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1723d24d4204SJose E. Roman {
1724d24d4204SJose E. Roman   Mat_ScaLAPACK      *a;
1725d24d4204SJose E. Roman   PetscErrorCode     ierr;
1726d24d4204SJose E. Roman   PetscBool          flg,flg1;
1727d24d4204SJose E. Roman   Mat_ScaLAPACK_Grid *grid;
1728d24d4204SJose E. Roman   MPI_Comm           icomm;
1729d24d4204SJose E. Roman   PetscBLASInt       nprow,npcol,myrow,mycol;
1730d24d4204SJose E. Roman   PetscInt           optv1,k=2,array[2]={0,0};
1731d24d4204SJose E. Roman   PetscMPIInt        size;
1732d24d4204SJose E. Roman 
1733d24d4204SJose E. Roman   PetscFunctionBegin;
1734d24d4204SJose E. Roman   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1735d24d4204SJose E. Roman   A->insertmode = NOT_SET_VALUES;
1736d24d4204SJose E. Roman 
1737d24d4204SJose E. Roman   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1738d24d4204SJose E. Roman   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1739d24d4204SJose E. Roman   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1740d24d4204SJose E. Roman   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1741d24d4204SJose E. Roman   A->stash.ScatterDestroy = NULL;
1742d24d4204SJose E. Roman 
1743d24d4204SJose E. Roman   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1744d24d4204SJose E. Roman   A->data = (void*)a;
1745d24d4204SJose E. Roman 
1746d24d4204SJose E. Roman   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1747d24d4204SJose E. Roman   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1748d24d4204SJose E. Roman     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr);
1749*f7ec113fSDamian Marek     ierr = PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);CHKERRQ(ierr);
1750d24d4204SJose E. Roman   }
1751d24d4204SJose E. Roman   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1752d24d4204SJose E. Roman   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1753d24d4204SJose E. Roman   if (!flg) {
1754d24d4204SJose E. Roman     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1755d24d4204SJose E. Roman 
1756d24d4204SJose E. Roman     ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr);
1757d24d4204SJose E. Roman     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1758d24d4204SJose E. Roman 
1759d24d4204SJose E. Roman     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1760d24d4204SJose E. Roman     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1761d24d4204SJose E. Roman     if (flg1) {
1762d24d4204SJose E. Roman       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1763d24d4204SJose E. Roman       grid->nprow = optv1;
1764d24d4204SJose E. Roman     }
1765d24d4204SJose E. Roman     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1766d24d4204SJose E. Roman 
1767d24d4204SJose E. Roman     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1768d24d4204SJose E. Roman     grid->npcol = size/grid->nprow;
1769d24d4204SJose E. Roman     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1770d24d4204SJose E. Roman     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1771*f7ec113fSDamian Marek     grid->ictxt = Csys2blacs_handle(icomm);
1772d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1773d24d4204SJose E. Roman     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1774d24d4204SJose E. Roman     grid->grid_refct = 1;
1775d24d4204SJose E. Roman     grid->nprow      = nprow;
1776d24d4204SJose E. Roman     grid->npcol      = npcol;
1777d24d4204SJose E. Roman     grid->myrow      = myrow;
1778d24d4204SJose E. Roman     grid->mycol      = mycol;
1779d24d4204SJose E. Roman     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1780*f7ec113fSDamian Marek     grid->ictxrow = Csys2blacs_handle(icomm);
1781d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1782*f7ec113fSDamian Marek     grid->ictxcol = Csys2blacs_handle(icomm);
1783d24d4204SJose E. Roman     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1784d24d4204SJose E. Roman     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr);
1785d24d4204SJose E. Roman 
1786d24d4204SJose E. Roman   } else grid->grid_refct++;
1787d24d4204SJose E. Roman   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1788d24d4204SJose E. Roman   a->grid = grid;
1789d24d4204SJose E. Roman   a->mb   = DEFAULT_BLOCKSIZE;
1790d24d4204SJose E. Roman   a->nb   = DEFAULT_BLOCKSIZE;
1791d24d4204SJose E. Roman 
1792d24d4204SJose E. Roman   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1793d24d4204SJose E. Roman   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1794d24d4204SJose E. Roman   if (flg) {
1795d24d4204SJose E. Roman     a->mb = array[0];
1796d24d4204SJose E. Roman     a->nb = (k>1)? array[1]: a->mb;
1797d24d4204SJose E. Roman   }
1798d24d4204SJose E. Roman   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1799d24d4204SJose E. Roman 
1800d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1801d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1802d24d4204SJose E. Roman   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1803d24d4204SJose E. Roman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1804d24d4204SJose E. Roman   PetscFunctionReturn(0);
1805d24d4204SJose E. Roman }
1806d24d4204SJose E. Roman 
1807d24d4204SJose E. Roman /*@C
1808d24d4204SJose E. Roman    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1809d24d4204SJose E. Roman    (2D block cyclic distribution).
1810d24d4204SJose E. Roman 
1811d24d4204SJose E. Roman    Collective
1812d24d4204SJose E. Roman 
1813d24d4204SJose E. Roman    Input Parameters:
1814d24d4204SJose E. Roman +  comm - MPI communicator
1815d24d4204SJose E. Roman .  mb   - row block size (or PETSC_DECIDE to have it set)
1816d24d4204SJose E. Roman .  nb   - column block size (or PETSC_DECIDE to have it set)
1817d24d4204SJose E. Roman .  M    - number of global rows
1818d24d4204SJose E. Roman .  N    - number of global columns
1819d24d4204SJose E. Roman .  rsrc - coordinate of process that owns the first row of the distributed matrix
1820d24d4204SJose E. Roman -  csrc - coordinate of process that owns the first column of the distributed matrix
1821d24d4204SJose E. Roman 
1822d24d4204SJose E. Roman    Output Parameter:
1823d24d4204SJose E. Roman .  A - the matrix
1824d24d4204SJose E. Roman 
1825d24d4204SJose E. Roman    Options Database Keys:
1826d24d4204SJose E. Roman .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1827d24d4204SJose E. Roman 
1828d24d4204SJose E. Roman    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1829d24d4204SJose E. Roman    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1830d24d4204SJose E. Roman    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1831d24d4204SJose E. Roman 
1832d24d4204SJose E. Roman    Notes:
1833d24d4204SJose E. Roman    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1834d24d4204SJose E. Roman    is chosen.
1835d24d4204SJose E. Roman 
1836d24d4204SJose E. Roman    Storage Information:
1837d24d4204SJose E. Roman    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1838d24d4204SJose E. Roman    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1839d24d4204SJose E. Roman    significance and are thus ignored. The block sizes refer to the values
1840d24d4204SJose E. Roman    used for the distributed matrix, not the same meaning as in BAIJ.
1841d24d4204SJose E. Roman 
1842d24d4204SJose E. Roman    Level: intermediate
1843d24d4204SJose E. Roman 
1844d24d4204SJose E. Roman .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1845d24d4204SJose E. Roman @*/
1846d24d4204SJose E. Roman PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1847d24d4204SJose E. Roman {
1848d24d4204SJose E. Roman   PetscErrorCode ierr;
1849d24d4204SJose E. Roman   Mat_ScaLAPACK  *a;
1850d24d4204SJose E. Roman   PetscInt       m,n;
1851d24d4204SJose E. Roman 
1852d24d4204SJose E. Roman   PetscFunctionBegin;
1853d24d4204SJose E. Roman   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1854d24d4204SJose E. Roman   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1855d24d4204SJose E. Roman   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1856d24d4204SJose E. Roman   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1857d24d4204SJose E. Roman   m = PETSC_DECIDE;
1858d24d4204SJose E. Roman   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1859d24d4204SJose E. Roman   n = PETSC_DECIDE;
1860d24d4204SJose E. Roman   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1861d24d4204SJose E. Roman   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1862d24d4204SJose E. Roman   a = (Mat_ScaLAPACK*)(*A)->data;
1863d24d4204SJose E. Roman   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1864d24d4204SJose E. Roman   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1865d24d4204SJose E. Roman   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1866d24d4204SJose E. Roman   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1867d24d4204SJose E. Roman   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1868d24d4204SJose E. Roman   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1869d24d4204SJose E. Roman   ierr = MatSetUp(*A);CHKERRQ(ierr);
1870d24d4204SJose E. Roman   PetscFunctionReturn(0);
1871d24d4204SJose E. Roman }
1872d24d4204SJose E. Roman 
1873