1 /*$Id: mmaij.c,v 1.59 2001/08/07 03:02:49 balay Exp $*/ 2 3 /* 4 Support for the parallel AIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 10 int MatSetUpMultiply_MPIAIJ(Mat mat) 11 { 12 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 13 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 14 int N = mat->N,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 #if defined (PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 int gid,lid; 21 #endif 22 23 PetscFunctionBegin; 24 25 #if defined (PETSC_USE_CTABLE) 26 /* use a table - Mark Adams (this has not been tested with "shift") */ 27 ierr = PetscTableCreate(aij->B->m,&gid1_lid1);CHKERRQ(ierr); 28 for (i=0; i<aij->B->m; i++) { 29 for (j=0; j<B->ilen[i]; j++) { 30 int data,gid1 = aj[B->i[i] + j] + 1; 31 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 32 if (!data) { 33 /* one based table */ 34 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 35 } 36 } 37 } 38 /* form array of columns we need */ 39 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 40 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 41 while (tpos) { 42 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 43 gid--; 44 lid--; 45 garray[lid] = gid; 46 } 47 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 48 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 49 for (i=0; i<ec; i++) { 50 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 51 } 52 /* compact out the extra columns in B */ 53 for (i=0; i<aij->B->m; i++) { 54 for (j=0; j<B->ilen[i]; j++) { 55 int gid1 = aj[B->i[i] + j] + 1; 56 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 57 lid --; 58 aj[B->i[i] + j] = lid; 59 } 60 } 61 aij->B->n = aij->B->N = ec; 62 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 63 /* Mark Adams */ 64 #else 65 /* For the first stab we make an array as long as the number of columns */ 66 /* mark those columns that are in aij->B */ 67 ierr = PetscMalloc((N+1)*sizeof(int),&indices);CHKERRQ(ierr); 68 ierr = PetscMemzero(indices,N*sizeof(int));CHKERRQ(ierr); 69 for (i=0; i<aij->B->m; i++) { 70 for (j=0; j<B->ilen[i]; j++) { 71 if (!indices[aj[B->i[i] + j] ]) ec++; 72 indices[aj[B->i[i] + j] ] = 1; 73 } 74 } 75 76 /* form array of columns we need */ 77 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 78 ec = 0; 79 for (i=0; i<N; i++) { 80 if (indices[i]) garray[ec++] = i; 81 } 82 83 /* make indices now point into garray */ 84 for (i=0; i<ec; i++) { 85 indices[garray[i]] = i; 86 } 87 88 /* compact out the extra columns in B */ 89 for (i=0; i<aij->B->m; i++) { 90 for (j=0; j<B->ilen[i]; j++) { 91 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 92 } 93 } 94 aij->B->n = aij->B->N = ec; 95 ierr = PetscFree(indices);CHKERRQ(ierr); 96 #endif 97 /* create local vector that is used to scatter into */ 98 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 99 100 /* create two temporary Index sets for build scatter gather */ 101 ierr = ISCreateGeneral(mat->comm,ec,garray,&from);CHKERRQ(ierr); 102 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 103 104 /* create temporary global vector to generate scatter context */ 105 /* this is inefficient, but otherwise we must do either 106 1) save garray until the first actual scatter when the vector is known or 107 2) have another way of generating a scatter context without a vector.*/ 108 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 109 110 /* generate the scatter context */ 111 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 112 PetscLogObjectParent(mat,aij->Mvctx); 113 PetscLogObjectParent(mat,aij->lvec); 114 PetscLogObjectParent(mat,from); 115 PetscLogObjectParent(mat,to); 116 aij->garray = garray; 117 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 118 ierr = ISDestroy(from);CHKERRQ(ierr); 119 ierr = ISDestroy(to);CHKERRQ(ierr); 120 ierr = VecDestroy(gvec);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "DisAssemble_MPIAIJ" 127 /* 128 Takes the local part of an already assembled MPIAIJ matrix 129 and disassembles it. This is to allow new nonzeros into the matrix 130 that require more communication in the matrix vector multiply. 131 Thus certain data-structures must be rebuilt. 132 133 Kind of slow! But that's what application programmers get when 134 they are sloppy. 135 */ 136 int DisAssemble_MPIAIJ(Mat A) 137 { 138 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 139 Mat B = aij->B,Bnew; 140 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 141 int ierr,i,j,m = B->m,n = A->N,col,ct = 0,*garray = aij->garray; 142 int *nz,ec; 143 PetscScalar v; 144 145 PetscFunctionBegin; 146 /* free stuff related to matrix-vec multiply */ 147 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 148 ierr = VecDestroy(aij->lvec);CHKERRQ(ierr); aij->lvec = 0; 149 ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr); aij->Mvctx = 0; 150 if (aij->colmap) { 151 #if defined (PETSC_USE_CTABLE) 152 ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr); 153 aij->colmap = 0; 154 #else 155 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 156 aij->colmap = 0; 157 PetscLogObjectMemory(A,-aij->B->n*sizeof(int)); 158 #endif 159 } 160 161 /* make sure that B is assembled so we can access its values */ 162 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 163 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 165 /* invent new B and copy stuff over */ 166 ierr = PetscMalloc((m+1)*sizeof(int),&nz);CHKERRQ(ierr); 167 for (i=0; i<m; i++) { 168 nz[i] = Baij->i[i+1] - Baij->i[i]; 169 } 170 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,0,nz,&Bnew);CHKERRQ(ierr); 171 ierr = PetscFree(nz);CHKERRQ(ierr); 172 for (i=0; i<m; i++) { 173 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 174 col = garray[Baij->j[ct]]; 175 v = Baij->a[ct++]; 176 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 177 } 178 } 179 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 180 aij->garray = 0; 181 PetscLogObjectMemory(A,-ec*sizeof(int)); 182 ierr = MatDestroy(B);CHKERRQ(ierr); 183 PetscLogObjectParent(A,Bnew); 184 aij->B = Bnew; 185 A->was_assembled = PETSC_FALSE; 186 PetscFunctionReturn(0); 187 } 188 189 /* ugly stuff added for Glenn someday we should fix this up */ 190 191 static int *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 192 parts of the local matrix */ 193 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 194 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 198 int MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 199 { 200 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 201 int ierr,i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 202 int *r_rmapd,*r_rmapo; 203 204 PetscFunctionBegin; 205 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 206 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 207 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr); 208 ierr = PetscMemzero(r_rmapd,inA->mapping->n*sizeof(int));CHKERRQ(ierr); 209 nt = 0; 210 for (i=0; i<inA->mapping->n; i++) { 211 if (inA->mapping->indices[i] >= cstart && inA->mapping->indices[i] < cend) { 212 nt++; 213 r_rmapd[i] = inA->mapping->indices[i] + 1; 214 } 215 } 216 if (nt != n) SETERRQ2(1,"Hmm nt %d n %d",nt,n); 217 ierr = PetscMalloc((n+1)*sizeof(int),&auglyrmapd);CHKERRQ(ierr); 218 for (i=0; i<inA->mapping->n; i++) { 219 if (r_rmapd[i]){ 220 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 221 } 222 } 223 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 224 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 225 226 ierr = PetscMalloc((inA->N+1)*sizeof(int),&lindices);CHKERRQ(ierr); 227 ierr = PetscMemzero(lindices,inA->N*sizeof(int));CHKERRQ(ierr); 228 for (i=0; i<ina->B->n; i++) { 229 lindices[garray[i]] = i+1; 230 } 231 no = inA->mapping->n - nt; 232 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr); 233 ierr = PetscMemzero(r_rmapo,inA->mapping->n*sizeof(int));CHKERRQ(ierr); 234 nt = 0; 235 for (i=0; i<inA->mapping->n; i++) { 236 if (lindices[inA->mapping->indices[i]]) { 237 nt++; 238 r_rmapo[i] = lindices[inA->mapping->indices[i]]; 239 } 240 } 241 if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n); 242 ierr = PetscFree(lindices);CHKERRQ(ierr); 243 ierr = PetscMalloc((nt+1)*sizeof(int),&auglyrmapo);CHKERRQ(ierr); 244 for (i=0; i<inA->mapping->n; i++) { 245 if (r_rmapo[i]){ 246 auglyrmapo[(r_rmapo[i]-1)] = i; 247 } 248 } 249 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 250 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 251 252 PetscFunctionReturn(0); 253 } 254 255 #undef __FUNCT__ 256 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 257 int MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 258 { 259 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 260 int ierr,(*f)(Mat,Vec); 261 262 PetscFunctionBegin; 263 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 264 if (f) { 265 ierr = (*f)(A,scale);CHKERRQ(ierr); 266 } 267 PetscFunctionReturn(0); 268 } 269 270 EXTERN_C_BEGIN 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 273 int MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 274 { 275 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 276 int ierr,n,i; 277 PetscScalar *d,*o,*s; 278 279 PetscFunctionBegin; 280 if (!auglyrmapd) { 281 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 282 } 283 284 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 285 286 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 287 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 288 for (i=0; i<n; i++) { 289 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 290 } 291 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 292 /* column scale "diagonal" portion of local matrix */ 293 ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 294 295 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 296 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 297 for (i=0; i<n; i++) { 298 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 299 } 300 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 301 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 302 /* column scale "off-diagonal" portion of local matrix */ 303 ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 304 305 PetscFunctionReturn(0); 306 } 307 EXTERN_C_END 308 309 310