1 /* 2 Support for the parallel AIJ matrix vector multiply 3 */ 4 #include "src/mat/impls/aij/mpi/mpiaij.h" 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 8 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 9 { 10 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 11 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 12 PetscErrorCode ierr; 13 PetscInt i,j,*aj = B->j,ec = 0,*garray; 14 IS from,to; 15 Vec gvec; 16 #if defined (PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 PetscInt gid,lid; 20 #else 21 PetscInt N = mat->N,*indices; 22 #endif 23 24 PetscFunctionBegin; 25 26 #if defined (PETSC_USE_CTABLE) 27 /* use a table - Mark Adams */ 28 ierr = PetscTableCreate(aij->B->m,&gid1_lid1);CHKERRQ(ierr); 29 for (i=0; i<aij->B->m; i++) { 30 for (j=0; j<B->ilen[i]; j++) { 31 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33 if (!data) { 34 /* one based table */ 35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 36 } 37 } 38 } 39 /* form array of columns we need */ 40 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 42 while (tpos) { 43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44 gid--; 45 lid--; 46 garray[lid] = gid; 47 } 48 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 49 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 50 for (i=0; i<ec; i++) { 51 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 52 } 53 /* compact out the extra columns in B */ 54 for (i=0; i<aij->B->m; i++) { 55 for (j=0; j<B->ilen[i]; j++) { 56 PetscInt gid1 = aj[B->i[i] + j] + 1; 57 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 58 lid --; 59 aj[B->i[i] + j] = lid; 60 } 61 } 62 aij->B->n = aij->B->N = ec; 63 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 64 /* Mark Adams */ 65 #else 66 /* For the first stab we make an array as long as the number of columns */ 67 /* mark those columns that are in aij->B */ 68 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 69 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 70 for (i=0; i<aij->B->m; i++) { 71 for (j=0; j<B->ilen[i]; j++) { 72 if (!indices[aj[B->i[i] + j] ]) ec++; 73 indices[aj[B->i[i] + j] ] = 1; 74 } 75 } 76 77 /* form array of columns we need */ 78 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 79 ec = 0; 80 for (i=0; i<N; i++) { 81 if (indices[i]) garray[ec++] = i; 82 } 83 84 /* make indices now point into garray */ 85 for (i=0; i<ec; i++) { 86 indices[garray[i]] = i; 87 } 88 89 /* compact out the extra columns in B */ 90 for (i=0; i<aij->B->m; i++) { 91 for (j=0; j<B->ilen[i]; j++) { 92 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 93 } 94 } 95 aij->B->n = aij->B->N = ec; 96 ierr = PetscFree(indices);CHKERRQ(ierr); 97 #endif 98 /* create local vector that is used to scatter into */ 99 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 100 101 /* create two temporary Index sets for build scatter gather */ 102 ierr = ISCreateGeneral(mat->comm,ec,garray,&from);CHKERRQ(ierr); 103 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 104 105 /* create temporary global vector to generate scatter context */ 106 /* this is inefficient, but otherwise we must do either 107 1) save garray until the first actual scatter when the vector is known or 108 2) have another way of generating a scatter context without a vector.*/ 109 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 110 111 /* generate the scatter context */ 112 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 113 PetscLogObjectParent(mat,aij->Mvctx); 114 PetscLogObjectParent(mat,aij->lvec); 115 PetscLogObjectParent(mat,from); 116 PetscLogObjectParent(mat,to); 117 aij->garray = garray; 118 PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt)); 119 ierr = ISDestroy(from);CHKERRQ(ierr); 120 ierr = ISDestroy(to);CHKERRQ(ierr); 121 ierr = VecDestroy(gvec);CHKERRQ(ierr); 122 PetscFunctionReturn(0); 123 } 124 125 126 #undef __FUNCT__ 127 #define __FUNCT__ "DisAssemble_MPIAIJ" 128 /* 129 Takes the local part of an already assembled MPIAIJ matrix 130 and disassembles it. This is to allow new nonzeros into the matrix 131 that require more communication in the matrix vector multiply. 132 Thus certain data-structures must be rebuilt. 133 134 Kind of slow! But that's what application programmers get when 135 they are sloppy. 136 */ 137 PetscErrorCode DisAssemble_MPIAIJ(Mat A) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 140 Mat B = aij->B,Bnew; 141 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 142 PetscErrorCode ierr; 143 PetscInt i,j,m = B->m,n = A->N,col,ct = 0,*garray = aij->garray,*nz,ec; 144 PetscScalar v; 145 146 PetscFunctionBegin; 147 /* free stuff related to matrix-vec multiply */ 148 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 149 ierr = VecDestroy(aij->lvec);CHKERRQ(ierr); aij->lvec = 0; 150 ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr); aij->Mvctx = 0; 151 if (aij->colmap) { 152 #if defined (PETSC_USE_CTABLE) 153 ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr); 154 aij->colmap = 0; 155 #else 156 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 157 aij->colmap = 0; 158 PetscLogObjectMemory(A,-aij->B->n*sizeof(PetscInt)); 159 #endif 160 } 161 162 /* make sure that B is assembled so we can access its values */ 163 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 164 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 165 166 /* invent new B and copy stuff over */ 167 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 168 for (i=0; i<m; i++) { 169 nz[i] = Baij->i[i+1] - Baij->i[i]; 170 } 171 ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr); 172 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 173 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 174 ierr = PetscFree(nz);CHKERRQ(ierr); 175 for (i=0; i<m; i++) { 176 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 177 col = garray[Baij->j[ct]]; 178 v = Baij->a[ct++]; 179 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 180 } 181 } 182 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 183 aij->garray = 0; 184 PetscLogObjectMemory(A,-ec*sizeof(PetscInt)); 185 ierr = MatDestroy(B);CHKERRQ(ierr); 186 PetscLogObjectParent(A,Bnew); 187 aij->B = Bnew; 188 A->was_assembled = PETSC_FALSE; 189 PetscFunctionReturn(0); 190 } 191 192 /* ugly stuff added for Glenn someday we should fix this up */ 193 194 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 195 parts of the local matrix */ 196 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 197 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 201 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 202 { 203 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 204 PetscErrorCode ierr; 205 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 206 PetscInt *r_rmapd,*r_rmapo; 207 208 PetscFunctionBegin; 209 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 210 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 211 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 212 ierr = PetscMemzero(r_rmapd,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 213 nt = 0; 214 for (i=0; i<inA->mapping->n; i++) { 215 if (inA->mapping->indices[i] >= cstart && inA->mapping->indices[i] < cend) { 216 nt++; 217 r_rmapd[i] = inA->mapping->indices[i] + 1; 218 } 219 } 220 if (nt != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 221 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 222 for (i=0; i<inA->mapping->n; i++) { 223 if (r_rmapd[i]){ 224 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 225 } 226 } 227 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 228 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 229 230 ierr = PetscMalloc((inA->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 231 ierr = PetscMemzero(lindices,inA->N*sizeof(PetscInt));CHKERRQ(ierr); 232 for (i=0; i<ina->B->n; i++) { 233 lindices[garray[i]] = i+1; 234 } 235 no = inA->mapping->n - nt; 236 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 237 ierr = PetscMemzero(r_rmapo,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 238 nt = 0; 239 for (i=0; i<inA->mapping->n; i++) { 240 if (lindices[inA->mapping->indices[i]]) { 241 nt++; 242 r_rmapo[i] = lindices[inA->mapping->indices[i]]; 243 } 244 } 245 if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 246 ierr = PetscFree(lindices);CHKERRQ(ierr); 247 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 248 for (i=0; i<inA->mapping->n; i++) { 249 if (r_rmapo[i]){ 250 auglyrmapo[(r_rmapo[i]-1)] = i; 251 } 252 } 253 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 254 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 255 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 261 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 262 { 263 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 264 PetscErrorCode ierr,(*f)(Mat,Vec); 265 266 PetscFunctionBegin; 267 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 268 if (f) { 269 ierr = (*f)(A,scale);CHKERRQ(ierr); 270 } 271 PetscFunctionReturn(0); 272 } 273 274 EXTERN_C_BEGIN 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 277 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 278 { 279 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 280 PetscErrorCode ierr; 281 PetscInt n,i; 282 PetscScalar *d,*o,*s; 283 284 PetscFunctionBegin; 285 if (!auglyrmapd) { 286 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 287 } 288 289 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 290 291 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 292 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 293 for (i=0; i<n; i++) { 294 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 295 } 296 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 297 /* column scale "diagonal" portion of local matrix */ 298 ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 299 300 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 301 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 302 for (i=0; i<n; i++) { 303 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 304 } 305 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 306 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 307 /* column scale "off-diagonal" portion of local matrix */ 308 ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 309 310 PetscFunctionReturn(0); 311 } 312 EXTERN_C_END 313 314 315