1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel AIJ matrix vector multiply 5 */ 6 #include "../src/mat/impls/aij/mpi/mpiaij.h" 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 10 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 11 { 12 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 13 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 14 PetscErrorCode ierr; 15 PetscInt i,j,*aj = B->j,ec = 0,*garray; 16 IS from,to; 17 Vec gvec; 18 PetscBool useblockis; 19 #if defined (PETSC_USE_CTABLE) 20 PetscTable gid1_lid1; 21 PetscTablePosition tpos; 22 PetscInt gid,lid; 23 #else 24 PetscInt N = mat->cmap->N,*indices; 25 #endif 26 27 PetscFunctionBegin; 28 29 #if defined (PETSC_USE_CTABLE) 30 /* use a table - Mark Adams */ 31 ierr = PetscTableCreate(aij->B->rmap->n,&gid1_lid1);CHKERRQ(ierr); 32 for (i=0; i<aij->B->rmap->n; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 35 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 36 if (!data) { 37 /* one based table */ 38 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; 48 lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<aij->B->rmap->n; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 PetscInt gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid --; 62 aj[B->i[i] + j] = lid; 63 } 64 } 65 aij->B->cmap->n = aij->B->cmap->N = ec; 66 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 68 /* Mark Adams */ 69 #else 70 /* Make an array as long as the number of columns */ 71 /* mark those columns that are in aij->B */ 72 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 73 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 74 for (i=0; i<aij->B->rmap->n; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j] ]) ec++; 77 indices[aj[B->i[i] + j] ] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 83 ec = 0; 84 for (i=0; i<N; i++) { 85 if (indices[i]) garray[ec++] = i; 86 } 87 88 /* make indices now point into garray */ 89 for (i=0; i<ec; i++) { 90 indices[garray[i]] = i; 91 } 92 93 /* compact out the extra columns in B */ 94 for (i=0; i<aij->B->rmap->n; i++) { 95 for (j=0; j<B->ilen[i]; j++) { 96 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 97 } 98 } 99 aij->B->cmap->n = aij->B->cmap->N = ec; 100 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 /* create local vector that is used to scatter into */ 104 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 105 106 /* create two temporary Index sets for build scatter gather */ 107 /* check for the special case where blocks are communicated for faster VecScatterXXX */ 108 useblockis = PETSC_TRUE; 109 if (mat->rmap->bs > 1) { 110 PetscInt bs = mat->rmap->bs,ibs,ga; 111 if (!(ec % bs)) { 112 for (i=0; i<ec/bs; i++) { 113 if ((ga = garray[ibs = i*bs]) % bs) { 114 useblockis = PETSC_FALSE; 115 break; 116 } 117 for (j=1; j<bs; j++) { 118 if (garray[ibs+j] != ga+j) { 119 useblockis = PETSC_FALSE; 120 break; 121 } 122 } 123 if (!useblockis) break; 124 } 125 } 126 } 127 if (useblockis) { 128 PetscInt *ga,bs = mat->rmap->bs,iec = ec/bs; 129 ierr = PetscInfo(mat,"Using block index set to define scatter\n"); 130 ierr = PetscMalloc((ec/mat->rmap->bs)*sizeof(PetscInt),&ga);CHKERRQ(ierr); 131 for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; 132 ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); 133 } else { 134 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 135 } 136 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 137 138 /* create temporary global vector to generate scatter context */ 139 /* This does not allocate the array's memory so is efficient */ 140 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 141 142 /* generate the scatter context */ 143 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 144 ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); 145 ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); 146 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 147 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 148 aij->garray = garray; 149 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 150 ierr = ISDestroy(from);CHKERRQ(ierr); 151 ierr = ISDestroy(to);CHKERRQ(ierr); 152 ierr = VecDestroy(gvec);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "DisAssemble_MPIAIJ" 159 /* 160 Takes the local part of an already assembled MPIAIJ matrix 161 and disassembles it. This is to allow new nonzeros into the matrix 162 that require more communication in the matrix vector multiply. 163 Thus certain data-structures must be rebuilt. 164 165 Kind of slow! But that's what application programmers get when 166 they are sloppy. 167 */ 168 PetscErrorCode DisAssemble_MPIAIJ(Mat A) 169 { 170 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 171 Mat B = aij->B,Bnew; 172 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 173 PetscErrorCode ierr; 174 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 175 PetscScalar v; 176 177 PetscFunctionBegin; 178 /* free stuff related to matrix-vec multiply */ 179 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 180 ierr = VecDestroy(aij->lvec);CHKERRQ(ierr); aij->lvec = 0; 181 ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr); aij->Mvctx = 0; 182 if (aij->colmap) { 183 #if defined (PETSC_USE_CTABLE) 184 ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr); 185 aij->colmap = 0; 186 #else 187 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 188 aij->colmap = 0; 189 ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 190 #endif 191 } 192 193 /* make sure that B is assembled so we can access its values */ 194 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 197 /* invent new B and copy stuff over */ 198 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 199 for (i=0; i<m; i++) { 200 nz[i] = Baij->i[i+1] - Baij->i[i]; 201 } 202 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 203 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 204 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 205 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 206 ierr = PetscFree(nz);CHKERRQ(ierr); 207 for (i=0; i<m; i++) { 208 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 209 col = garray[Baij->j[ct]]; 210 v = Baij->a[ct++]; 211 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 212 } 213 } 214 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 215 aij->garray = 0; 216 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = MatDestroy(B);CHKERRQ(ierr); 218 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 219 aij->B = Bnew; 220 A->was_assembled = PETSC_FALSE; 221 PetscFunctionReturn(0); 222 } 223 224 /* ugly stuff added for Glenn someday we should fix this up */ 225 226 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 227 parts of the local matrix */ 228 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 229 230 231 #undef __FUNCT__ 232 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 233 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 234 { 235 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 236 PetscErrorCode ierr; 237 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 238 PetscInt *r_rmapd,*r_rmapo; 239 240 PetscFunctionBegin; 241 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 242 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 243 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 244 ierr = PetscMemzero(r_rmapd,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 245 nt = 0; 246 for (i=0; i<inA->mapping->n; i++) { 247 if (inA->mapping->indices[i] >= cstart && inA->mapping->indices[i] < cend) { 248 nt++; 249 r_rmapd[i] = inA->mapping->indices[i] + 1; 250 } 251 } 252 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 253 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 254 for (i=0; i<inA->mapping->n; i++) { 255 if (r_rmapd[i]){ 256 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 257 } 258 } 259 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 260 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 261 262 ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 263 ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 264 for (i=0; i<ina->B->cmap->n; i++) { 265 lindices[garray[i]] = i+1; 266 } 267 no = inA->mapping->n - nt; 268 ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 269 ierr = PetscMemzero(r_rmapo,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 270 nt = 0; 271 for (i=0; i<inA->mapping->n; i++) { 272 if (lindices[inA->mapping->indices[i]]) { 273 nt++; 274 r_rmapo[i] = lindices[inA->mapping->indices[i]]; 275 } 276 } 277 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 278 ierr = PetscFree(lindices);CHKERRQ(ierr); 279 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 280 for (i=0; i<inA->mapping->n; i++) { 281 if (r_rmapo[i]){ 282 auglyrmapo[(r_rmapo[i]-1)] = i; 283 } 284 } 285 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 286 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 287 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 293 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 294 { 295 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 296 PetscErrorCode ierr; 297 298 PetscFunctionBegin; 299 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 EXTERN_C_BEGIN 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 306 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 307 { 308 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 309 PetscErrorCode ierr; 310 PetscInt n,i; 311 PetscScalar *d,*o,*s; 312 313 PetscFunctionBegin; 314 if (!auglyrmapd) { 315 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 316 } 317 318 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 319 320 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 321 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 322 for (i=0; i<n; i++) { 323 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 324 } 325 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 326 /* column scale "diagonal" portion of local matrix */ 327 ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 328 329 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 330 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 331 for (i=0; i<n; i++) { 332 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 333 } 334 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 335 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 336 /* column scale "off-diagonal" portion of local matrix */ 337 ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 338 339 PetscFunctionReturn(0); 340 } 341 EXTERN_C_END 342 343 344