1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc-private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 10 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 11 { 12 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 13 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 14 PetscErrorCode ierr; 15 PetscInt i,j,*aj = B->j,ec = 0,*garray; 16 IS from,to; 17 Vec gvec; 18 #if defined(PETSC_USE_CTABLE) 19 PetscTable gid1_lid1; 20 PetscTablePosition tpos; 21 PetscInt gid,lid; 22 #else 23 PetscInt N = mat->cmap->N,*indices; 24 #endif 25 26 PetscFunctionBegin; 27 #if defined(PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid--; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->cmap->n = aij->B->cmap->N = ec; 64 65 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 66 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in aij->B */ 70 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 71 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 72 for (i=0; i<aij->B->rmap->n; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<N; i++) { 83 if (indices[i]) garray[ec++] = i; 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<aij->B->rmap->n; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 aij->B->cmap->n = aij->B->cmap->N = ec; 98 99 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 100 ierr = PetscFree(indices);CHKERRQ(ierr); 101 #endif 102 /* create local vector that is used to scatter into */ 103 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 104 105 /* create two temporary Index sets for build scatter gather */ 106 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 107 108 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 109 110 /* create temporary global vector to generate scatter context */ 111 /* This does not allocate the array's memory so is efficient */ 112 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 113 114 /* generate the scatter context */ 115 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 116 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 117 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 118 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 119 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 120 121 aij->garray = garray; 122 123 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 124 ierr = ISDestroy(&from);CHKERRQ(ierr); 125 ierr = ISDestroy(&to);CHKERRQ(ierr); 126 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 127 PetscFunctionReturn(0); 128 } 129 130 131 #undef __FUNCT__ 132 #define __FUNCT__ "MatDisAssemble_MPIAIJ" 133 /* 134 Takes the local part of an already assembled MPIAIJ matrix 135 and disassembles it. This is to allow new nonzeros into the matrix 136 that require more communication in the matrix vector multiply. 137 Thus certain data-structures must be rebuilt. 138 139 Kind of slow! But that's what application programmers get when 140 they are sloppy. 141 */ 142 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 143 { 144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 145 Mat B = aij->B,Bnew; 146 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 149 PetscScalar v; 150 151 PetscFunctionBegin; 152 /* free stuff related to matrix-vec multiply */ 153 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 154 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 155 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 156 if (aij->colmap) { 157 #if defined(PETSC_USE_CTABLE) 158 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 159 #else 160 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 161 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 162 #endif 163 } 164 165 /* make sure that B is assembled so we can access its values */ 166 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 167 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 168 169 /* invent new B and copy stuff over */ 170 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 171 for (i=0; i<m; i++) { 172 nz[i] = Baij->i[i+1] - Baij->i[i]; 173 } 174 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 175 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 176 ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 177 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 178 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 179 180 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 181 182 ierr = PetscFree(nz);CHKERRQ(ierr); 183 for (i=0; i<m; i++) { 184 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 185 col = garray[Baij->j[ct]]; 186 v = Baij->a[ct++]; 187 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 188 } 189 } 190 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 191 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 192 ierr = MatDestroy(&B);CHKERRQ(ierr); 193 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 194 195 aij->B = Bnew; 196 A->was_assembled = PETSC_FALSE; 197 PetscFunctionReturn(0); 198 } 199 200 /* ugly stuff added for Glenn someday we should fix this up */ 201 202 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 203 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 204 205 206 #undef __FUNCT__ 207 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 208 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 209 { 210 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 211 PetscErrorCode ierr; 212 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 213 PetscInt *r_rmapd,*r_rmapo; 214 215 PetscFunctionBegin; 216 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 217 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 218 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 219 ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 220 nt = 0; 221 for (i=0; i<inA->rmap->mapping->n; i++) { 222 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 223 nt++; 224 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 225 } 226 } 227 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 228 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 229 for (i=0; i<inA->rmap->mapping->n; i++) { 230 if (r_rmapd[i]) { 231 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 232 } 233 } 234 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 235 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 236 237 ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 238 ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 239 for (i=0; i<ina->B->cmap->n; i++) { 240 lindices[garray[i]] = i+1; 241 } 242 no = inA->rmap->mapping->n - nt; 243 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 244 ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 245 nt = 0; 246 for (i=0; i<inA->rmap->mapping->n; i++) { 247 if (lindices[inA->rmap->mapping->indices[i]]) { 248 nt++; 249 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 250 } 251 } 252 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 253 ierr = PetscFree(lindices);CHKERRQ(ierr); 254 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 255 for (i=0; i<inA->rmap->mapping->n; i++) { 256 if (r_rmapo[i]) { 257 auglyrmapo[(r_rmapo[i]-1)] = i; 258 } 259 } 260 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 261 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 267 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 268 { 269 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 279 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 280 { 281 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 282 PetscErrorCode ierr; 283 PetscInt n,i; 284 PetscScalar *d,*o,*s; 285 286 PetscFunctionBegin; 287 if (!auglyrmapd) { 288 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 289 } 290 291 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 292 293 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 294 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 295 for (i=0; i<n; i++) { 296 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 297 } 298 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 299 /* column scale "diagonal" portion of local matrix */ 300 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 301 302 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 303 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 304 for (i=0; i<n; i++) { 305 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 306 } 307 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 308 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 309 /* column scale "off-diagonal" portion of local matrix */ 310 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 315