1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *owners = A->rmap->range; 787 PetscInt n = A->rmap->n; 788 PetscSF sf; 789 PetscInt *lrows; 790 PetscSFNode *rrows; 791 PetscInt r, p = 0, len = 0; 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 /* Create SF where leaves are input rows and roots are owned rows */ 796 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 797 for (r = 0; r < n; ++r) lrows[r] = -1; 798 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 799 for (r = 0; r < N; ++r) { 800 const PetscInt idx = rows[r]; 801 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 802 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 803 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 804 } 805 if (A->nooffproczerorows) { 806 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 807 lrows[len++] = idx - owners[p]; 808 } else { 809 rrows[r].rank = p; 810 rrows[r].index = rows[r] - owners[p]; 811 } 812 } 813 if (!A->nooffproczerorows) { 814 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 815 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 816 /* Collect flags for rows to be zeroed */ 817 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 819 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 820 /* Compress and put in row numbers */ 821 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 822 } 823 /* fix right hand side if needed */ 824 if (x && b) { 825 const PetscScalar *xx; 826 PetscScalar *bb; 827 828 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 829 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 831 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 833 } 834 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 835 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 836 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 837 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 838 } else if (diag != 0.0) { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 841 for (r = 0; r < len; ++r) { 842 const PetscInt row = lrows[r] + A->rmap->rstart; 843 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 844 } 845 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 } else { 848 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 849 } 850 ierr = PetscFree(lrows);CHKERRQ(ierr); 851 852 /* only change matrix nonzero state if pattern was allowed to be changed */ 853 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 854 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 855 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 865 PetscErrorCode ierr; 866 PetscMPIInt n = A->rmap->n; 867 PetscInt i,j,r,m,p = 0,len = 0; 868 PetscInt *lrows,*owners = A->rmap->range; 869 PetscSFNode *rrows; 870 PetscSF sf; 871 const PetscScalar *xx; 872 PetscScalar *bb,*mask; 873 Vec xmask,lmask; 874 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 875 const PetscInt *aj, *ii,*ridx; 876 PetscScalar *aa; 877 878 PetscFunctionBegin; 879 /* Create SF where leaves are input rows and roots are owned rows */ 880 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 881 for (r = 0; r < n; ++r) lrows[r] = -1; 882 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 883 for (r = 0; r < N; ++r) { 884 const PetscInt idx = rows[r]; 885 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 886 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 887 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 888 } 889 rrows[r].rank = p; 890 rrows[r].index = rows[r] - owners[p]; 891 } 892 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 893 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 894 /* Collect flags for rows to be zeroed */ 895 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 896 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 897 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 898 /* Compress and put in row numbers */ 899 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 900 /* zero diagonal part of matrix */ 901 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 902 /* handle off diagonal part of matrix */ 903 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 904 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 905 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 906 for (i=0; i<len; i++) bb[lrows[i]] = 1; 907 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 908 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 911 if (x) { 912 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 916 } 917 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 918 /* remove zeroed rows of off diagonal matrix */ 919 ii = aij->i; 920 for (i=0; i<len; i++) { 921 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 922 } 923 /* loop over all elements of off process part of matrix zeroing removed columns*/ 924 if (aij->compressedrow.use) { 925 m = aij->compressedrow.nrows; 926 ii = aij->compressedrow.i; 927 ridx = aij->compressedrow.rindex; 928 for (i=0; i<m; i++) { 929 n = ii[i+1] - ii[i]; 930 aj = aij->j + ii[i]; 931 aa = aij->a + ii[i]; 932 933 for (j=0; j<n; j++) { 934 if (PetscAbsScalar(mask[*aj])) { 935 if (b) bb[*ridx] -= *aa*xx[*aj]; 936 *aa = 0.0; 937 } 938 aa++; 939 aj++; 940 } 941 ridx++; 942 } 943 } else { /* do not use compressed row format */ 944 m = l->B->rmap->n; 945 for (i=0; i<m; i++) { 946 n = ii[i+1] - ii[i]; 947 aj = aij->j + ii[i]; 948 aa = aij->a + ii[i]; 949 for (j=0; j<n; j++) { 950 if (PetscAbsScalar(mask[*aj])) { 951 if (b) bb[i] -= *aa*xx[*aj]; 952 *aa = 0.0; 953 } 954 aa++; 955 aj++; 956 } 957 } 958 } 959 if (x) { 960 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 961 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 962 } 963 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 964 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 965 ierr = PetscFree(lrows);CHKERRQ(ierr); 966 967 /* only change matrix nonzero state if pattern was allowed to be changed */ 968 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 969 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 970 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMult_MPIAIJ" 977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscInt nt; 982 983 PetscFunctionBegin; 984 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 985 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1008 { 1009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 PetscBool merged; 1027 1028 PetscFunctionBegin; 1029 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1030 /* do nondiagonal part */ 1031 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1032 if (!merged) { 1033 /* send it on its way */ 1034 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1035 /* do local part */ 1036 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1037 /* receive remote parts: note this assumes the values are not actually */ 1038 /* added in yy until the next line, */ 1039 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1040 } else { 1041 /* do local part */ 1042 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1043 /* send it on its way */ 1044 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 /* values actually were received in the Begin() but we need to call this nop */ 1046 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1053 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1054 { 1055 MPI_Comm comm; 1056 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1057 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1058 IS Me,Notme; 1059 PetscErrorCode ierr; 1060 PetscInt M,N,first,last,*notme,i; 1061 PetscMPIInt size; 1062 1063 PetscFunctionBegin; 1064 /* Easy test: symmetric diagonal block */ 1065 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1066 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1067 if (!*f) PetscFunctionReturn(0); 1068 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1070 if (size == 1) PetscFunctionReturn(0); 1071 1072 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1073 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1074 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1076 for (i=0; i<first; i++) notme[i] = i; 1077 for (i=last; i<M; i++) notme[i-last+first] = i; 1078 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1079 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1080 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1081 Aoff = Aoffs[0]; 1082 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1083 Boff = Boffs[0]; 1084 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1085 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1086 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1087 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1088 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1089 ierr = PetscFree(notme);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 /* do nondiagonal part */ 1102 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1103 /* send it on its way */ 1104 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1105 /* do local part */ 1106 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1107 /* receive remote parts */ 1108 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* 1113 This only works correctly for square matrices where the subblock A->A is the 1114 diagonal block 1115 */ 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1119 { 1120 PetscErrorCode ierr; 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 1123 PetscFunctionBegin; 1124 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1125 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1126 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatScale_MPIAIJ" 1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1139 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatDestroy_MPIAIJ" 1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 #if defined(PETSC_USE_LOG) 1152 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1153 #endif 1154 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1156 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1157 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1158 #if defined(PETSC_USE_CTABLE) 1159 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1160 #else 1161 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1162 #endif 1163 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1164 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1165 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1166 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1167 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1168 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1169 1170 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1192 PetscErrorCode ierr; 1193 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1194 int fd; 1195 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1196 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1197 PetscScalar *column_values; 1198 PetscInt message_count,flowcontrolcount; 1199 FILE *file; 1200 1201 PetscFunctionBegin; 1202 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1203 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1204 nz = A->nz + B->nz; 1205 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1206 if (!rank) { 1207 header[0] = MAT_FILE_CLASSID; 1208 header[1] = mat->rmap->N; 1209 header[2] = mat->cmap->N; 1210 1211 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1213 /* get largest number of rows any processor has */ 1214 rlen = mat->rmap->n; 1215 range = mat->rmap->range; 1216 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1217 } else { 1218 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 rlen = mat->rmap->n; 1220 } 1221 1222 /* load up the local row counts */ 1223 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1224 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1225 1226 /* store the row lengths to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1230 for (i=1; i<size; i++) { 1231 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1232 rlen = range[i+1] - range[i]; 1233 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1235 } 1236 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1237 } else { 1238 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1239 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1241 } 1242 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1243 1244 /* load up the local column indices */ 1245 nzmax = nz; /* th processor needs space a largest processor needs */ 1246 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1248 cnt = 0; 1249 for (i=0; i<mat->rmap->n; i++) { 1250 for (j=B->i[i]; j<B->i[i+1]; j++) { 1251 if ((col = garray[B->j[j]]) > cstart) break; 1252 column_indices[cnt++] = col; 1253 } 1254 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1255 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1256 } 1257 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1258 1259 /* store the column indices to the file */ 1260 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1261 if (!rank) { 1262 MPI_Status status; 1263 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 for (i=1; i<size; i++) { 1265 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1266 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1267 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1268 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1270 } 1271 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1272 } else { 1273 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1274 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1277 } 1278 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1279 1280 /* load up the local column values */ 1281 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1282 cnt = 0; 1283 for (i=0; i<mat->rmap->n; i++) { 1284 for (j=B->i[i]; j<B->i[i+1]; j++) { 1285 if (garray[B->j[j]] > cstart) break; 1286 column_values[cnt++] = B->a[j]; 1287 } 1288 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1289 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1290 } 1291 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1292 1293 /* store the column values to the file */ 1294 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1295 if (!rank) { 1296 MPI_Status status; 1297 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1298 for (i=1; i<size; i++) { 1299 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1300 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1301 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1302 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1304 } 1305 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1308 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1310 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1311 } 1312 ierr = PetscFree(column_values);CHKERRQ(ierr); 1313 1314 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1315 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #include <petscdraw.h> 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1323 { 1324 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 PetscMPIInt rank = aij->rank,size = aij->size; 1327 PetscBool isdraw,iascii,isbinary; 1328 PetscViewer sviewer; 1329 PetscViewerFormat format; 1330 1331 PetscFunctionBegin; 1332 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1333 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1334 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1335 if (iascii) { 1336 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1337 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1338 MatInfo info; 1339 PetscBool inodes; 1340 1341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1342 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1343 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1344 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1345 if (!inodes) { 1346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1347 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1348 } else { 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1350 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1351 } 1352 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1354 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1356 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1357 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1359 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1362 PetscInt inodecount,inodelimit,*inodes; 1363 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1364 if (inodes) { 1365 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1366 } else { 1367 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1368 } 1369 PetscFunctionReturn(0); 1370 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1371 PetscFunctionReturn(0); 1372 } 1373 } else if (isbinary) { 1374 if (size == 1) { 1375 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1376 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1377 } else { 1378 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } else if (isdraw) { 1382 PetscDraw draw; 1383 PetscBool isnull; 1384 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1385 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1386 } 1387 1388 { 1389 /* assemble the entire matrix onto first processor. */ 1390 Mat A; 1391 Mat_SeqAIJ *Aloc; 1392 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1393 MatScalar *a; 1394 1395 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1396 if (!rank) { 1397 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1398 } else { 1399 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1400 } 1401 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1402 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1403 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1404 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1405 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1406 1407 /* copy over the A part */ 1408 Aloc = (Mat_SeqAIJ*)aij->A->data; 1409 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1410 row = mat->rmap->rstart; 1411 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1412 for (i=0; i<m; i++) { 1413 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1414 row++; 1415 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1416 } 1417 aj = Aloc->j; 1418 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1419 1420 /* copy over the B part */ 1421 Aloc = (Mat_SeqAIJ*)aij->B->data; 1422 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1423 row = mat->rmap->rstart; 1424 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1425 ct = cols; 1426 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1427 for (i=0; i<m; i++) { 1428 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1429 row++; 1430 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1431 } 1432 ierr = PetscFree(ct);CHKERRQ(ierr); 1433 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1434 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 /* 1436 Everyone has to call to draw the matrix since the graphics waits are 1437 synchronized across all processors that share the PetscDraw object 1438 */ 1439 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1440 if (!rank) { 1441 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1442 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1443 } 1444 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1445 ierr = MatDestroy(&A);CHKERRQ(ierr); 1446 } 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatView_MPIAIJ" 1452 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1453 { 1454 PetscErrorCode ierr; 1455 PetscBool iascii,isdraw,issocket,isbinary; 1456 1457 PetscFunctionBegin; 1458 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1459 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1462 if (iascii || isdraw || isbinary || issocket) { 1463 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatSOR_MPIAIJ" 1470 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1471 { 1472 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1473 PetscErrorCode ierr; 1474 Vec bb1 = 0; 1475 PetscBool hasop; 1476 1477 PetscFunctionBegin; 1478 if (flag == SOR_APPLY_UPPER) { 1479 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1484 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1485 } 1486 1487 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1488 if (flag & SOR_ZERO_INITIAL_GUESS) { 1489 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1490 its--; 1491 } 1492 1493 while (its--) { 1494 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 1497 /* update rhs: bb1 = bb - B*x */ 1498 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1499 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1500 1501 /* local sweep */ 1502 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1503 } 1504 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1505 if (flag & SOR_ZERO_INITIAL_GUESS) { 1506 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1507 its--; 1508 } 1509 while (its--) { 1510 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 1513 /* update rhs: bb1 = bb - B*x */ 1514 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1515 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1516 1517 /* local sweep */ 1518 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1519 } 1520 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1521 if (flag & SOR_ZERO_INITIAL_GUESS) { 1522 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1523 its--; 1524 } 1525 while (its--) { 1526 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1527 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1528 1529 /* update rhs: bb1 = bb - B*x */ 1530 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1531 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1532 1533 /* local sweep */ 1534 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1535 } 1536 } else if (flag & SOR_EISENSTAT) { 1537 Vec xx1; 1538 1539 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1540 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1541 1542 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1543 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1544 if (!mat->diag) { 1545 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1546 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1547 } 1548 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1549 if (hasop) { 1550 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1551 } else { 1552 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1553 } 1554 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1555 1556 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1560 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1561 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1562 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1563 1564 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatPermute_MPIAIJ" 1570 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1571 { 1572 Mat aA,aB,Aperm; 1573 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1574 PetscScalar *aa,*ba; 1575 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1576 PetscSF rowsf,sf; 1577 IS parcolp = NULL; 1578 PetscBool done; 1579 PetscErrorCode ierr; 1580 1581 PetscFunctionBegin; 1582 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1583 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1584 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1585 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1586 1587 /* Invert row permutation to find out where my rows should go */ 1588 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1589 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1590 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1591 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1592 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1593 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1594 1595 /* Invert column permutation to find out where my columns should go */ 1596 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1597 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1598 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1599 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1600 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1601 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1602 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1603 1604 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1605 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1606 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1607 1608 /* Find out where my gcols should go */ 1609 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1610 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1611 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1612 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1613 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1614 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1615 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1616 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1617 1618 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1619 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1620 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1621 for (i=0; i<m; i++) { 1622 PetscInt row = rdest[i],rowner; 1623 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1624 for (j=ai[i]; j<ai[i+1]; j++) { 1625 PetscInt cowner,col = cdest[aj[j]]; 1626 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1627 if (rowner == cowner) dnnz[i]++; 1628 else onnz[i]++; 1629 } 1630 for (j=bi[i]; j<bi[i+1]; j++) { 1631 PetscInt cowner,col = gcdest[bj[j]]; 1632 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1633 if (rowner == cowner) dnnz[i]++; 1634 else onnz[i]++; 1635 } 1636 } 1637 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1638 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1639 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1640 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1641 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1642 1643 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1644 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1645 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1646 for (i=0; i<m; i++) { 1647 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1648 PetscInt j0,rowlen; 1649 rowlen = ai[i+1] - ai[i]; 1650 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1651 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1652 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1653 } 1654 rowlen = bi[i+1] - bi[i]; 1655 for (j0=j=0; j<rowlen; j0=j) { 1656 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1657 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1658 } 1659 } 1660 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1661 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1662 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1663 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1664 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1665 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1666 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1667 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1668 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1669 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1670 *B = Aperm; 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1676 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1677 { 1678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1679 PetscErrorCode ierr; 1680 1681 PetscFunctionBegin; 1682 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1683 if (ghosts) *ghosts = aij->garray; 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1689 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1690 { 1691 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1692 Mat A = mat->A,B = mat->B; 1693 PetscErrorCode ierr; 1694 PetscReal isend[5],irecv[5]; 1695 1696 PetscFunctionBegin; 1697 info->block_size = 1.0; 1698 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1699 1700 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1701 isend[3] = info->memory; isend[4] = info->mallocs; 1702 1703 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1704 1705 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1706 isend[3] += info->memory; isend[4] += info->mallocs; 1707 if (flag == MAT_LOCAL) { 1708 info->nz_used = isend[0]; 1709 info->nz_allocated = isend[1]; 1710 info->nz_unneeded = isend[2]; 1711 info->memory = isend[3]; 1712 info->mallocs = isend[4]; 1713 } else if (flag == MAT_GLOBAL_MAX) { 1714 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1715 1716 info->nz_used = irecv[0]; 1717 info->nz_allocated = irecv[1]; 1718 info->nz_unneeded = irecv[2]; 1719 info->memory = irecv[3]; 1720 info->mallocs = irecv[4]; 1721 } else if (flag == MAT_GLOBAL_SUM) { 1722 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1723 1724 info->nz_used = irecv[0]; 1725 info->nz_allocated = irecv[1]; 1726 info->nz_unneeded = irecv[2]; 1727 info->memory = irecv[3]; 1728 info->mallocs = irecv[4]; 1729 } 1730 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1731 info->fill_ratio_needed = 0; 1732 info->factor_mallocs = 0; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatSetOption_MPIAIJ" 1738 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1739 { 1740 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1741 PetscErrorCode ierr; 1742 1743 PetscFunctionBegin; 1744 switch (op) { 1745 case MAT_NEW_NONZERO_LOCATIONS: 1746 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1747 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1748 case MAT_KEEP_NONZERO_PATTERN: 1749 case MAT_NEW_NONZERO_LOCATION_ERR: 1750 case MAT_USE_INODES: 1751 case MAT_IGNORE_ZERO_ENTRIES: 1752 MatCheckPreallocated(A,1); 1753 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1754 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1755 break; 1756 case MAT_ROW_ORIENTED: 1757 a->roworiented = flg; 1758 1759 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1760 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_NEW_DIAGONALS: 1763 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1764 break; 1765 case MAT_IGNORE_OFF_PROC_ENTRIES: 1766 a->donotstash = flg; 1767 break; 1768 case MAT_SPD: 1769 A->spd_set = PETSC_TRUE; 1770 A->spd = flg; 1771 if (flg) { 1772 A->symmetric = PETSC_TRUE; 1773 A->structurally_symmetric = PETSC_TRUE; 1774 A->symmetric_set = PETSC_TRUE; 1775 A->structurally_symmetric_set = PETSC_TRUE; 1776 } 1777 break; 1778 case MAT_SYMMETRIC: 1779 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1780 break; 1781 case MAT_STRUCTURALLY_SYMMETRIC: 1782 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1783 break; 1784 case MAT_HERMITIAN: 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 break; 1787 case MAT_SYMMETRY_ETERNAL: 1788 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1789 break; 1790 default: 1791 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1792 } 1793 PetscFunctionReturn(0); 1794 } 1795 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatGetRow_MPIAIJ" 1798 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1799 { 1800 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1801 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1802 PetscErrorCode ierr; 1803 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1804 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1805 PetscInt *cmap,*idx_p; 1806 1807 PetscFunctionBegin; 1808 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1809 mat->getrowactive = PETSC_TRUE; 1810 1811 if (!mat->rowvalues && (idx || v)) { 1812 /* 1813 allocate enough space to hold information from the longest row. 1814 */ 1815 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1816 PetscInt max = 1,tmp; 1817 for (i=0; i<matin->rmap->n; i++) { 1818 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1819 if (max < tmp) max = tmp; 1820 } 1821 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1822 } 1823 1824 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1825 lrow = row - rstart; 1826 1827 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1828 if (!v) {pvA = 0; pvB = 0;} 1829 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1830 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1831 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1832 nztot = nzA + nzB; 1833 1834 cmap = mat->garray; 1835 if (v || idx) { 1836 if (nztot) { 1837 /* Sort by increasing column numbers, assuming A and B already sorted */ 1838 PetscInt imark = -1; 1839 if (v) { 1840 *v = v_p = mat->rowvalues; 1841 for (i=0; i<nzB; i++) { 1842 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1843 else break; 1844 } 1845 imark = i; 1846 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1847 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1848 } 1849 if (idx) { 1850 *idx = idx_p = mat->rowindices; 1851 if (imark > -1) { 1852 for (i=0; i<imark; i++) { 1853 idx_p[i] = cmap[cworkB[i]]; 1854 } 1855 } else { 1856 for (i=0; i<nzB; i++) { 1857 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1858 else break; 1859 } 1860 imark = i; 1861 } 1862 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1863 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1864 } 1865 } else { 1866 if (idx) *idx = 0; 1867 if (v) *v = 0; 1868 } 1869 } 1870 *nz = nztot; 1871 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1872 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1878 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1879 { 1880 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1881 1882 PetscFunctionBegin; 1883 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1884 aij->getrowactive = PETSC_FALSE; 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatNorm_MPIAIJ" 1890 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1891 { 1892 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1893 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1894 PetscErrorCode ierr; 1895 PetscInt i,j,cstart = mat->cmap->rstart; 1896 PetscReal sum = 0.0; 1897 MatScalar *v; 1898 1899 PetscFunctionBegin; 1900 if (aij->size == 1) { 1901 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1902 } else { 1903 if (type == NORM_FROBENIUS) { 1904 v = amat->a; 1905 for (i=0; i<amat->nz; i++) { 1906 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1907 } 1908 v = bmat->a; 1909 for (i=0; i<bmat->nz; i++) { 1910 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1911 } 1912 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1913 *norm = PetscSqrtReal(*norm); 1914 } else if (type == NORM_1) { /* max column norm */ 1915 PetscReal *tmp,*tmp2; 1916 PetscInt *jj,*garray = aij->garray; 1917 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1918 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1919 *norm = 0.0; 1920 v = amat->a; jj = amat->j; 1921 for (j=0; j<amat->nz; j++) { 1922 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1923 } 1924 v = bmat->a; jj = bmat->j; 1925 for (j=0; j<bmat->nz; j++) { 1926 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1927 } 1928 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1929 for (j=0; j<mat->cmap->N; j++) { 1930 if (tmp2[j] > *norm) *norm = tmp2[j]; 1931 } 1932 ierr = PetscFree(tmp);CHKERRQ(ierr); 1933 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1934 } else if (type == NORM_INFINITY) { /* max row norm */ 1935 PetscReal ntemp = 0.0; 1936 for (j=0; j<aij->A->rmap->n; j++) { 1937 v = amat->a + amat->i[j]; 1938 sum = 0.0; 1939 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1940 sum += PetscAbsScalar(*v); v++; 1941 } 1942 v = bmat->a + bmat->i[j]; 1943 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1944 sum += PetscAbsScalar(*v); v++; 1945 } 1946 if (sum > ntemp) ntemp = sum; 1947 } 1948 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1949 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1950 } 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatTranspose_MPIAIJ" 1956 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1957 { 1958 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1959 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1960 PetscErrorCode ierr; 1961 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1962 PetscInt cstart = A->cmap->rstart,ncol; 1963 Mat B; 1964 MatScalar *array; 1965 1966 PetscFunctionBegin; 1967 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1968 1969 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1970 ai = Aloc->i; aj = Aloc->j; 1971 bi = Bloc->i; bj = Bloc->j; 1972 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1973 PetscInt *d_nnz,*g_nnz,*o_nnz; 1974 PetscSFNode *oloc; 1975 PETSC_UNUSED PetscSF sf; 1976 1977 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1978 /* compute d_nnz for preallocation */ 1979 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1980 for (i=0; i<ai[ma]; i++) { 1981 d_nnz[aj[i]]++; 1982 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1983 } 1984 /* compute local off-diagonal contributions */ 1985 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1986 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1987 /* map those to global */ 1988 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1989 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1990 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1991 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1992 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1993 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1994 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1995 1996 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1997 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1998 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1999 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2000 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2001 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2002 } else { 2003 B = *matout; 2004 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2005 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2006 } 2007 2008 /* copy over the A part */ 2009 array = Aloc->a; 2010 row = A->rmap->rstart; 2011 for (i=0; i<ma; i++) { 2012 ncol = ai[i+1]-ai[i]; 2013 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2014 row++; 2015 array += ncol; aj += ncol; 2016 } 2017 aj = Aloc->j; 2018 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2019 2020 /* copy over the B part */ 2021 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2022 array = Bloc->a; 2023 row = A->rmap->rstart; 2024 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2025 cols_tmp = cols; 2026 for (i=0; i<mb; i++) { 2027 ncol = bi[i+1]-bi[i]; 2028 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2029 row++; 2030 array += ncol; cols_tmp += ncol; 2031 } 2032 ierr = PetscFree(cols);CHKERRQ(ierr); 2033 2034 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2035 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2036 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2037 *matout = B; 2038 } else { 2039 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2040 } 2041 PetscFunctionReturn(0); 2042 } 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2046 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2047 { 2048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2049 Mat a = aij->A,b = aij->B; 2050 PetscErrorCode ierr; 2051 PetscInt s1,s2,s3; 2052 2053 PetscFunctionBegin; 2054 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2055 if (rr) { 2056 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2057 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2058 /* Overlap communication with computation. */ 2059 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2060 } 2061 if (ll) { 2062 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2063 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2064 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2065 } 2066 /* scale the diagonal block */ 2067 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2068 2069 if (rr) { 2070 /* Do a scatter end and then right scale the off-diagonal block */ 2071 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2079 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2080 { 2081 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2082 PetscErrorCode ierr; 2083 2084 PetscFunctionBegin; 2085 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatEqual_MPIAIJ" 2091 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2092 { 2093 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2094 Mat a,b,c,d; 2095 PetscBool flg; 2096 PetscErrorCode ierr; 2097 2098 PetscFunctionBegin; 2099 a = matA->A; b = matA->B; 2100 c = matB->A; d = matB->B; 2101 2102 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2103 if (flg) { 2104 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2105 } 2106 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2107 PetscFunctionReturn(0); 2108 } 2109 2110 #undef __FUNCT__ 2111 #define __FUNCT__ "MatCopy_MPIAIJ" 2112 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2113 { 2114 PetscErrorCode ierr; 2115 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2116 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2117 2118 PetscFunctionBegin; 2119 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2120 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2121 /* because of the column compression in the off-processor part of the matrix a->B, 2122 the number of columns in a->B and b->B may be different, hence we cannot call 2123 the MatCopy() directly on the two parts. If need be, we can provide a more 2124 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2125 then copying the submatrices */ 2126 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2127 } else { 2128 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2129 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2130 } 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatSetUp_MPIAIJ" 2136 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2137 { 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /* 2146 Computes the number of nonzeros per row needed for preallocation when X and Y 2147 have different nonzero structure. 2148 */ 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2151 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2152 { 2153 PetscInt i,j,k,nzx,nzy; 2154 2155 PetscFunctionBegin; 2156 /* Set the number of nonzeros in the new matrix */ 2157 for (i=0; i<m; i++) { 2158 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2159 nzx = xi[i+1] - xi[i]; 2160 nzy = yi[i+1] - yi[i]; 2161 nnz[i] = 0; 2162 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2163 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2164 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2165 nnz[i]++; 2166 } 2167 for (; k<nzy; k++) nnz[i]++; 2168 } 2169 PetscFunctionReturn(0); 2170 } 2171 2172 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2173 #undef __FUNCT__ 2174 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2175 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2176 { 2177 PetscErrorCode ierr; 2178 PetscInt m = Y->rmap->N; 2179 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2180 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2181 2182 PetscFunctionBegin; 2183 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatAXPY_MPIAIJ" 2189 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2190 { 2191 PetscErrorCode ierr; 2192 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2193 PetscBLASInt bnz,one=1; 2194 Mat_SeqAIJ *x,*y; 2195 2196 PetscFunctionBegin; 2197 if (str == SAME_NONZERO_PATTERN) { 2198 PetscScalar alpha = a; 2199 x = (Mat_SeqAIJ*)xx->A->data; 2200 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2201 y = (Mat_SeqAIJ*)yy->A->data; 2202 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2203 x = (Mat_SeqAIJ*)xx->B->data; 2204 y = (Mat_SeqAIJ*)yy->B->data; 2205 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2206 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2207 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2208 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2209 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2210 } else { 2211 Mat B; 2212 PetscInt *nnz_d,*nnz_o; 2213 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2214 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2215 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2216 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2217 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2218 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2219 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2220 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2221 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2222 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2223 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2224 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2225 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2226 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatConjugate_MPIAIJ" 2235 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2236 { 2237 #if defined(PETSC_USE_COMPLEX) 2238 PetscErrorCode ierr; 2239 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2240 2241 PetscFunctionBegin; 2242 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2243 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2244 #else 2245 PetscFunctionBegin; 2246 #endif 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatRealPart_MPIAIJ" 2252 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2253 { 2254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2255 PetscErrorCode ierr; 2256 2257 PetscFunctionBegin; 2258 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2259 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2265 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2266 { 2267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2268 PetscErrorCode ierr; 2269 2270 PetscFunctionBegin; 2271 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2272 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2273 PetscFunctionReturn(0); 2274 } 2275 2276 #if defined(PETSC_HAVE_PBGL) 2277 2278 #include <boost/parallel/mpi/bsp_process_group.hpp> 2279 #include <boost/graph/distributed/ilu_default_graph.hpp> 2280 #include <boost/graph/distributed/ilu_0_block.hpp> 2281 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2282 #include <boost/graph/distributed/petsc/interface.hpp> 2283 #include <boost/multi_array.hpp> 2284 #include <boost/parallel/distributed_property_map->hpp> 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2288 /* 2289 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2290 */ 2291 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2292 { 2293 namespace petsc = boost::distributed::petsc; 2294 2295 namespace graph_dist = boost::graph::distributed; 2296 using boost::graph::distributed::ilu_default::process_group_type; 2297 using boost::graph::ilu_permuted; 2298 2299 PetscBool row_identity, col_identity; 2300 PetscContainer c; 2301 PetscInt m, n, M, N; 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2306 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2307 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2308 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2309 2310 process_group_type pg; 2311 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2312 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2313 lgraph_type& level_graph = *lgraph_p; 2314 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2315 2316 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2317 ilu_permuted(level_graph); 2318 2319 /* put together the new matrix */ 2320 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2321 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2322 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2323 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2324 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2325 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2326 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2327 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2328 2329 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2330 ierr = PetscContainerSetPointer(c, lgraph_p); 2331 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2332 ierr = PetscContainerDestroy(&c); 2333 PetscFunctionReturn(0); 2334 } 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2338 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2339 { 2340 PetscFunctionBegin; 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatSolve_MPIAIJ" 2346 /* 2347 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2348 */ 2349 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2350 { 2351 namespace graph_dist = boost::graph::distributed; 2352 2353 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2354 lgraph_type *lgraph_p; 2355 PetscContainer c; 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2360 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2361 ierr = VecCopy(b, x);CHKERRQ(ierr); 2362 2363 PetscScalar *array_x; 2364 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2365 PetscInt sx; 2366 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2367 2368 PetscScalar *array_b; 2369 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2370 PetscInt sb; 2371 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2372 2373 lgraph_type& level_graph = *lgraph_p; 2374 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2375 2376 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2377 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2378 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2379 2380 typedef boost::iterator_property_map<array_ref_type::iterator, 2381 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2382 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2383 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2384 2385 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2386 PetscFunctionReturn(0); 2387 } 2388 #endif 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2392 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2393 { 2394 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2395 PetscErrorCode ierr; 2396 PetscInt i,*idxb = 0; 2397 PetscScalar *va,*vb; 2398 Vec vtmp; 2399 2400 PetscFunctionBegin; 2401 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2402 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2403 if (idx) { 2404 for (i=0; i<A->rmap->n; i++) { 2405 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2406 } 2407 } 2408 2409 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2410 if (idx) { 2411 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2412 } 2413 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2414 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2415 2416 for (i=0; i<A->rmap->n; i++) { 2417 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2418 va[i] = vb[i]; 2419 if (idx) idx[i] = a->garray[idxb[i]]; 2420 } 2421 } 2422 2423 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2424 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2425 ierr = PetscFree(idxb);CHKERRQ(ierr); 2426 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2432 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2433 { 2434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2435 PetscErrorCode ierr; 2436 PetscInt i,*idxb = 0; 2437 PetscScalar *va,*vb; 2438 Vec vtmp; 2439 2440 PetscFunctionBegin; 2441 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2442 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2443 if (idx) { 2444 for (i=0; i<A->cmap->n; i++) { 2445 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2446 } 2447 } 2448 2449 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2450 if (idx) { 2451 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2452 } 2453 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2454 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2455 2456 for (i=0; i<A->rmap->n; i++) { 2457 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2458 va[i] = vb[i]; 2459 if (idx) idx[i] = a->garray[idxb[i]]; 2460 } 2461 } 2462 2463 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2465 ierr = PetscFree(idxb);CHKERRQ(ierr); 2466 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2472 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2473 { 2474 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2475 PetscInt n = A->rmap->n; 2476 PetscInt cstart = A->cmap->rstart; 2477 PetscInt *cmap = mat->garray; 2478 PetscInt *diagIdx, *offdiagIdx; 2479 Vec diagV, offdiagV; 2480 PetscScalar *a, *diagA, *offdiagA; 2481 PetscInt r; 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2486 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2487 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2488 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2489 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2490 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2491 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2492 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2493 for (r = 0; r < n; ++r) { 2494 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2495 a[r] = diagA[r]; 2496 idx[r] = cstart + diagIdx[r]; 2497 } else { 2498 a[r] = offdiagA[r]; 2499 idx[r] = cmap[offdiagIdx[r]]; 2500 } 2501 } 2502 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2503 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2504 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2505 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2506 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2507 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 #undef __FUNCT__ 2512 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2513 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2514 { 2515 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2516 PetscInt n = A->rmap->n; 2517 PetscInt cstart = A->cmap->rstart; 2518 PetscInt *cmap = mat->garray; 2519 PetscInt *diagIdx, *offdiagIdx; 2520 Vec diagV, offdiagV; 2521 PetscScalar *a, *diagA, *offdiagA; 2522 PetscInt r; 2523 PetscErrorCode ierr; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2527 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2528 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2529 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2530 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2531 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2532 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2533 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2534 for (r = 0; r < n; ++r) { 2535 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2536 a[r] = diagA[r]; 2537 idx[r] = cstart + diagIdx[r]; 2538 } else { 2539 a[r] = offdiagA[r]; 2540 idx[r] = cmap[offdiagIdx[r]]; 2541 } 2542 } 2543 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2544 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2545 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2546 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2547 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2548 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 #undef __FUNCT__ 2553 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2554 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2555 { 2556 PetscErrorCode ierr; 2557 Mat *dummy; 2558 2559 PetscFunctionBegin; 2560 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2561 *newmat = *dummy; 2562 ierr = PetscFree(dummy);CHKERRQ(ierr); 2563 PetscFunctionReturn(0); 2564 } 2565 2566 #undef __FUNCT__ 2567 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2568 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2569 { 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2571 PetscErrorCode ierr; 2572 2573 PetscFunctionBegin; 2574 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2575 PetscFunctionReturn(0); 2576 } 2577 2578 #undef __FUNCT__ 2579 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2580 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2581 { 2582 PetscErrorCode ierr; 2583 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2584 2585 PetscFunctionBegin; 2586 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2587 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2588 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2589 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2590 PetscFunctionReturn(0); 2591 } 2592 2593 #undef __FUNCT__ 2594 #define __FUNCT__ "MatShift_MPIAIJ" 2595 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2596 { 2597 PetscErrorCode ierr; 2598 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2599 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data,*bij = (Mat_SeqAIJ*)maij->B->data; 2600 2601 PetscFunctionBegin; 2602 if (!aij->nz && !bij->nz) { 2603 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2604 } 2605 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2606 PetscFunctionReturn(0); 2607 } 2608 2609 /* -------------------------------------------------------------------*/ 2610 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2611 MatGetRow_MPIAIJ, 2612 MatRestoreRow_MPIAIJ, 2613 MatMult_MPIAIJ, 2614 /* 4*/ MatMultAdd_MPIAIJ, 2615 MatMultTranspose_MPIAIJ, 2616 MatMultTransposeAdd_MPIAIJ, 2617 #if defined(PETSC_HAVE_PBGL) 2618 MatSolve_MPIAIJ, 2619 #else 2620 0, 2621 #endif 2622 0, 2623 0, 2624 /*10*/ 0, 2625 0, 2626 0, 2627 MatSOR_MPIAIJ, 2628 MatTranspose_MPIAIJ, 2629 /*15*/ MatGetInfo_MPIAIJ, 2630 MatEqual_MPIAIJ, 2631 MatGetDiagonal_MPIAIJ, 2632 MatDiagonalScale_MPIAIJ, 2633 MatNorm_MPIAIJ, 2634 /*20*/ MatAssemblyBegin_MPIAIJ, 2635 MatAssemblyEnd_MPIAIJ, 2636 MatSetOption_MPIAIJ, 2637 MatZeroEntries_MPIAIJ, 2638 /*24*/ MatZeroRows_MPIAIJ, 2639 0, 2640 #if defined(PETSC_HAVE_PBGL) 2641 0, 2642 #else 2643 0, 2644 #endif 2645 0, 2646 0, 2647 /*29*/ MatSetUp_MPIAIJ, 2648 #if defined(PETSC_HAVE_PBGL) 2649 0, 2650 #else 2651 0, 2652 #endif 2653 0, 2654 0, 2655 0, 2656 /*34*/ MatDuplicate_MPIAIJ, 2657 0, 2658 0, 2659 0, 2660 0, 2661 /*39*/ MatAXPY_MPIAIJ, 2662 MatGetSubMatrices_MPIAIJ, 2663 MatIncreaseOverlap_MPIAIJ, 2664 MatGetValues_MPIAIJ, 2665 MatCopy_MPIAIJ, 2666 /*44*/ MatGetRowMax_MPIAIJ, 2667 MatScale_MPIAIJ, 2668 MatShift_MPIAIJ, 2669 MatDiagonalSet_MPIAIJ, 2670 MatZeroRowsColumns_MPIAIJ, 2671 /*49*/ MatSetRandom_MPIAIJ, 2672 0, 2673 0, 2674 0, 2675 0, 2676 /*54*/ MatFDColoringCreate_MPIXAIJ, 2677 0, 2678 MatSetUnfactored_MPIAIJ, 2679 MatPermute_MPIAIJ, 2680 0, 2681 /*59*/ MatGetSubMatrix_MPIAIJ, 2682 MatDestroy_MPIAIJ, 2683 MatView_MPIAIJ, 2684 0, 2685 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2686 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2687 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2688 0, 2689 0, 2690 0, 2691 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2692 MatGetRowMinAbs_MPIAIJ, 2693 0, 2694 MatSetColoring_MPIAIJ, 2695 0, 2696 MatSetValuesAdifor_MPIAIJ, 2697 /*75*/ MatFDColoringApply_AIJ, 2698 0, 2699 0, 2700 0, 2701 MatFindZeroDiagonals_MPIAIJ, 2702 /*80*/ 0, 2703 0, 2704 0, 2705 /*83*/ MatLoad_MPIAIJ, 2706 0, 2707 0, 2708 0, 2709 0, 2710 0, 2711 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2712 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2713 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2714 MatPtAP_MPIAIJ_MPIAIJ, 2715 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2716 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*99*/ 0, 2722 0, 2723 0, 2724 MatConjugate_MPIAIJ, 2725 0, 2726 /*104*/MatSetValuesRow_MPIAIJ, 2727 MatRealPart_MPIAIJ, 2728 MatImaginaryPart_MPIAIJ, 2729 0, 2730 0, 2731 /*109*/0, 2732 0, 2733 MatGetRowMin_MPIAIJ, 2734 0, 2735 0, 2736 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2737 0, 2738 MatGetGhosts_MPIAIJ, 2739 0, 2740 0, 2741 /*119*/0, 2742 0, 2743 0, 2744 0, 2745 MatGetMultiProcBlock_MPIAIJ, 2746 /*124*/MatFindNonzeroRows_MPIAIJ, 2747 MatGetColumnNorms_MPIAIJ, 2748 MatInvertBlockDiagonal_MPIAIJ, 2749 0, 2750 MatGetSubMatricesMPI_MPIAIJ, 2751 /*129*/0, 2752 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2753 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2754 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2755 0, 2756 /*134*/0, 2757 0, 2758 0, 2759 0, 2760 0, 2761 /*139*/0, 2762 0, 2763 0, 2764 MatFDColoringSetUp_MPIXAIJ, 2765 MatFindOffBlockDiagonalEntries_MPIAIJ, 2766 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2767 }; 2768 2769 /* ----------------------------------------------------------------------------------------*/ 2770 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2773 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2774 { 2775 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2776 PetscErrorCode ierr; 2777 2778 PetscFunctionBegin; 2779 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2780 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2781 PetscFunctionReturn(0); 2782 } 2783 2784 #undef __FUNCT__ 2785 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2786 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2787 { 2788 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2793 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2794 PetscFunctionReturn(0); 2795 } 2796 2797 #undef __FUNCT__ 2798 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2799 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2800 { 2801 Mat_MPIAIJ *b; 2802 PetscErrorCode ierr; 2803 2804 PetscFunctionBegin; 2805 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2806 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2807 b = (Mat_MPIAIJ*)B->data; 2808 2809 if (!B->preallocated) { 2810 /* Explicitly create 2 MATSEQAIJ matrices. */ 2811 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2812 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2813 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2814 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2815 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2816 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2817 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2818 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2819 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2820 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2821 } 2822 2823 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2824 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2825 B->preallocated = PETSC_TRUE; 2826 PetscFunctionReturn(0); 2827 } 2828 2829 #undef __FUNCT__ 2830 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2831 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2832 { 2833 Mat mat; 2834 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2835 PetscErrorCode ierr; 2836 2837 PetscFunctionBegin; 2838 *newmat = 0; 2839 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2840 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2841 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2842 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2843 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2844 a = (Mat_MPIAIJ*)mat->data; 2845 2846 mat->factortype = matin->factortype; 2847 mat->assembled = PETSC_TRUE; 2848 mat->insertmode = NOT_SET_VALUES; 2849 mat->preallocated = PETSC_TRUE; 2850 2851 a->size = oldmat->size; 2852 a->rank = oldmat->rank; 2853 a->donotstash = oldmat->donotstash; 2854 a->roworiented = oldmat->roworiented; 2855 a->rowindices = 0; 2856 a->rowvalues = 0; 2857 a->getrowactive = PETSC_FALSE; 2858 2859 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2860 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2861 2862 if (oldmat->colmap) { 2863 #if defined(PETSC_USE_CTABLE) 2864 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2865 #else 2866 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2867 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2868 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2869 #endif 2870 } else a->colmap = 0; 2871 if (oldmat->garray) { 2872 PetscInt len; 2873 len = oldmat->B->cmap->n; 2874 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2875 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2876 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2877 } else a->garray = 0; 2878 2879 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2880 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2881 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2883 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2884 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2885 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2887 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2888 *newmat = mat; 2889 PetscFunctionReturn(0); 2890 } 2891 2892 2893 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatLoad_MPIAIJ" 2896 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2897 { 2898 PetscScalar *vals,*svals; 2899 MPI_Comm comm; 2900 PetscErrorCode ierr; 2901 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2902 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2903 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2904 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2905 PetscInt cend,cstart,n,*rowners; 2906 int fd; 2907 PetscInt bs = newMat->rmap->bs; 2908 2909 PetscFunctionBegin; 2910 /* force binary viewer to load .info file if it has not yet done so */ 2911 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2912 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2913 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2914 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2915 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2916 if (!rank) { 2917 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2918 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2919 } 2920 2921 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2922 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2923 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2924 if (bs < 0) bs = 1; 2925 2926 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2927 M = header[1]; N = header[2]; 2928 2929 /* If global sizes are set, check if they are consistent with that given in the file */ 2930 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2931 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2932 2933 /* determine ownership of all (block) rows */ 2934 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2935 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2936 else m = newMat->rmap->n; /* Set by user */ 2937 2938 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2939 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2940 2941 /* First process needs enough room for process with most rows */ 2942 if (!rank) { 2943 mmax = rowners[1]; 2944 for (i=2; i<=size; i++) { 2945 mmax = PetscMax(mmax, rowners[i]); 2946 } 2947 } else mmax = -1; /* unused, but compilers complain */ 2948 2949 rowners[0] = 0; 2950 for (i=2; i<=size; i++) { 2951 rowners[i] += rowners[i-1]; 2952 } 2953 rstart = rowners[rank]; 2954 rend = rowners[rank+1]; 2955 2956 /* distribute row lengths to all processors */ 2957 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2958 if (!rank) { 2959 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2960 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2961 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2962 for (j=0; j<m; j++) { 2963 procsnz[0] += ourlens[j]; 2964 } 2965 for (i=1; i<size; i++) { 2966 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2967 /* calculate the number of nonzeros on each processor */ 2968 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2969 procsnz[i] += rowlengths[j]; 2970 } 2971 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2972 } 2973 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2974 } else { 2975 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2976 } 2977 2978 if (!rank) { 2979 /* determine max buffer needed and allocate it */ 2980 maxnz = 0; 2981 for (i=0; i<size; i++) { 2982 maxnz = PetscMax(maxnz,procsnz[i]); 2983 } 2984 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2985 2986 /* read in my part of the matrix column indices */ 2987 nz = procsnz[0]; 2988 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2989 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2990 2991 /* read in every one elses and ship off */ 2992 for (i=1; i<size; i++) { 2993 nz = procsnz[i]; 2994 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2995 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2996 } 2997 ierr = PetscFree(cols);CHKERRQ(ierr); 2998 } else { 2999 /* determine buffer space needed for message */ 3000 nz = 0; 3001 for (i=0; i<m; i++) { 3002 nz += ourlens[i]; 3003 } 3004 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3005 3006 /* receive message of column indices*/ 3007 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3008 } 3009 3010 /* determine column ownership if matrix is not square */ 3011 if (N != M) { 3012 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3013 else n = newMat->cmap->n; 3014 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3015 cstart = cend - n; 3016 } else { 3017 cstart = rstart; 3018 cend = rend; 3019 n = cend - cstart; 3020 } 3021 3022 /* loop over local rows, determining number of off diagonal entries */ 3023 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3024 jj = 0; 3025 for (i=0; i<m; i++) { 3026 for (j=0; j<ourlens[i]; j++) { 3027 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3028 jj++; 3029 } 3030 } 3031 3032 for (i=0; i<m; i++) { 3033 ourlens[i] -= offlens[i]; 3034 } 3035 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3036 3037 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3038 3039 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3040 3041 for (i=0; i<m; i++) { 3042 ourlens[i] += offlens[i]; 3043 } 3044 3045 if (!rank) { 3046 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3047 3048 /* read in my part of the matrix numerical values */ 3049 nz = procsnz[0]; 3050 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3051 3052 /* insert into matrix */ 3053 jj = rstart; 3054 smycols = mycols; 3055 svals = vals; 3056 for (i=0; i<m; i++) { 3057 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3058 smycols += ourlens[i]; 3059 svals += ourlens[i]; 3060 jj++; 3061 } 3062 3063 /* read in other processors and ship out */ 3064 for (i=1; i<size; i++) { 3065 nz = procsnz[i]; 3066 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3067 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3068 } 3069 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3070 } else { 3071 /* receive numeric values */ 3072 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3073 3074 /* receive message of values*/ 3075 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3076 3077 /* insert into matrix */ 3078 jj = rstart; 3079 smycols = mycols; 3080 svals = vals; 3081 for (i=0; i<m; i++) { 3082 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3083 smycols += ourlens[i]; 3084 svals += ourlens[i]; 3085 jj++; 3086 } 3087 } 3088 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3089 ierr = PetscFree(vals);CHKERRQ(ierr); 3090 ierr = PetscFree(mycols);CHKERRQ(ierr); 3091 ierr = PetscFree(rowners);CHKERRQ(ierr); 3092 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 #undef __FUNCT__ 3098 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3099 /* TODO: Not scalable because of ISAllGather(). */ 3100 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3101 { 3102 PetscErrorCode ierr; 3103 IS iscol_local; 3104 PetscInt csize; 3105 3106 PetscFunctionBegin; 3107 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3108 if (call == MAT_REUSE_MATRIX) { 3109 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3110 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3111 } else { 3112 PetscInt cbs; 3113 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3114 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3115 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3116 } 3117 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3118 if (call == MAT_INITIAL_MATRIX) { 3119 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3120 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3121 } 3122 PetscFunctionReturn(0); 3123 } 3124 3125 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3126 #undef __FUNCT__ 3127 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3128 /* 3129 Not great since it makes two copies of the submatrix, first an SeqAIJ 3130 in local and then by concatenating the local matrices the end result. 3131 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3132 3133 Note: This requires a sequential iscol with all indices. 3134 */ 3135 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3136 { 3137 PetscErrorCode ierr; 3138 PetscMPIInt rank,size; 3139 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3140 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3141 PetscBool allcolumns, colflag; 3142 Mat M,Mreuse; 3143 MatScalar *vwork,*aa; 3144 MPI_Comm comm; 3145 Mat_SeqAIJ *aij; 3146 3147 PetscFunctionBegin; 3148 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3149 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3150 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3151 3152 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3153 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3154 if (colflag && ncol == mat->cmap->N) { 3155 allcolumns = PETSC_TRUE; 3156 } else { 3157 allcolumns = PETSC_FALSE; 3158 } 3159 if (call == MAT_REUSE_MATRIX) { 3160 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3161 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3162 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3163 } else { 3164 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3165 } 3166 3167 /* 3168 m - number of local rows 3169 n - number of columns (same on all processors) 3170 rstart - first row in new global matrix generated 3171 */ 3172 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3173 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3174 if (call == MAT_INITIAL_MATRIX) { 3175 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3176 ii = aij->i; 3177 jj = aij->j; 3178 3179 /* 3180 Determine the number of non-zeros in the diagonal and off-diagonal 3181 portions of the matrix in order to do correct preallocation 3182 */ 3183 3184 /* first get start and end of "diagonal" columns */ 3185 if (csize == PETSC_DECIDE) { 3186 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3187 if (mglobal == n) { /* square matrix */ 3188 nlocal = m; 3189 } else { 3190 nlocal = n/size + ((n % size) > rank); 3191 } 3192 } else { 3193 nlocal = csize; 3194 } 3195 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3196 rstart = rend - nlocal; 3197 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3198 3199 /* next, compute all the lengths */ 3200 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3201 olens = dlens + m; 3202 for (i=0; i<m; i++) { 3203 jend = ii[i+1] - ii[i]; 3204 olen = 0; 3205 dlen = 0; 3206 for (j=0; j<jend; j++) { 3207 if (*jj < rstart || *jj >= rend) olen++; 3208 else dlen++; 3209 jj++; 3210 } 3211 olens[i] = olen; 3212 dlens[i] = dlen; 3213 } 3214 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3215 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3216 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3217 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3218 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3219 ierr = PetscFree(dlens);CHKERRQ(ierr); 3220 } else { 3221 PetscInt ml,nl; 3222 3223 M = *newmat; 3224 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3225 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3226 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3227 /* 3228 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3229 rather than the slower MatSetValues(). 3230 */ 3231 M->was_assembled = PETSC_TRUE; 3232 M->assembled = PETSC_FALSE; 3233 } 3234 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3235 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3236 ii = aij->i; 3237 jj = aij->j; 3238 aa = aij->a; 3239 for (i=0; i<m; i++) { 3240 row = rstart + i; 3241 nz = ii[i+1] - ii[i]; 3242 cwork = jj; jj += nz; 3243 vwork = aa; aa += nz; 3244 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3245 } 3246 3247 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3248 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3249 *newmat = M; 3250 3251 /* save submatrix used in processor for next request */ 3252 if (call == MAT_INITIAL_MATRIX) { 3253 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3254 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3255 } 3256 PetscFunctionReturn(0); 3257 } 3258 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3261 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3262 { 3263 PetscInt m,cstart, cend,j,nnz,i,d; 3264 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3265 const PetscInt *JJ; 3266 PetscScalar *values; 3267 PetscErrorCode ierr; 3268 3269 PetscFunctionBegin; 3270 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3271 3272 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3273 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3274 m = B->rmap->n; 3275 cstart = B->cmap->rstart; 3276 cend = B->cmap->rend; 3277 rstart = B->rmap->rstart; 3278 3279 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3280 3281 #if defined(PETSC_USE_DEBUGGING) 3282 for (i=0; i<m; i++) { 3283 nnz = Ii[i+1]- Ii[i]; 3284 JJ = J + Ii[i]; 3285 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3286 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3287 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3288 } 3289 #endif 3290 3291 for (i=0; i<m; i++) { 3292 nnz = Ii[i+1]- Ii[i]; 3293 JJ = J + Ii[i]; 3294 nnz_max = PetscMax(nnz_max,nnz); 3295 d = 0; 3296 for (j=0; j<nnz; j++) { 3297 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3298 } 3299 d_nnz[i] = d; 3300 o_nnz[i] = nnz - d; 3301 } 3302 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3303 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3304 3305 if (v) values = (PetscScalar*)v; 3306 else { 3307 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3308 } 3309 3310 for (i=0; i<m; i++) { 3311 ii = i + rstart; 3312 nnz = Ii[i+1]- Ii[i]; 3313 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3314 } 3315 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3316 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3317 3318 if (!v) { 3319 ierr = PetscFree(values);CHKERRQ(ierr); 3320 } 3321 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3322 PetscFunctionReturn(0); 3323 } 3324 3325 #undef __FUNCT__ 3326 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3327 /*@ 3328 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3329 (the default parallel PETSc format). 3330 3331 Collective on MPI_Comm 3332 3333 Input Parameters: 3334 + B - the matrix 3335 . i - the indices into j for the start of each local row (starts with zero) 3336 . j - the column indices for each local row (starts with zero) 3337 - v - optional values in the matrix 3338 3339 Level: developer 3340 3341 Notes: 3342 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3343 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3344 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3345 3346 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3347 3348 The format which is used for the sparse matrix input, is equivalent to a 3349 row-major ordering.. i.e for the following matrix, the input data expected is 3350 as shown: 3351 3352 1 0 0 3353 2 0 3 P0 3354 ------- 3355 4 5 6 P1 3356 3357 Process0 [P0]: rows_owned=[0,1] 3358 i = {0,1,3} [size = nrow+1 = 2+1] 3359 j = {0,0,2} [size = nz = 6] 3360 v = {1,2,3} [size = nz = 6] 3361 3362 Process1 [P1]: rows_owned=[2] 3363 i = {0,3} [size = nrow+1 = 1+1] 3364 j = {0,1,2} [size = nz = 6] 3365 v = {4,5,6} [size = nz = 6] 3366 3367 .keywords: matrix, aij, compressed row, sparse, parallel 3368 3369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3370 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3371 @*/ 3372 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3373 { 3374 PetscErrorCode ierr; 3375 3376 PetscFunctionBegin; 3377 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3378 PetscFunctionReturn(0); 3379 } 3380 3381 #undef __FUNCT__ 3382 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3383 /*@C 3384 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3385 (the default parallel PETSc format). For good matrix assembly performance 3386 the user should preallocate the matrix storage by setting the parameters 3387 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3388 performance can be increased by more than a factor of 50. 3389 3390 Collective on MPI_Comm 3391 3392 Input Parameters: 3393 + B - the matrix 3394 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3395 (same value is used for all local rows) 3396 . d_nnz - array containing the number of nonzeros in the various rows of the 3397 DIAGONAL portion of the local submatrix (possibly different for each row) 3398 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3399 The size of this array is equal to the number of local rows, i.e 'm'. 3400 For matrices that will be factored, you must leave room for (and set) 3401 the diagonal entry even if it is zero. 3402 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3403 submatrix (same value is used for all local rows). 3404 - o_nnz - array containing the number of nonzeros in the various rows of the 3405 OFF-DIAGONAL portion of the local submatrix (possibly different for 3406 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3407 structure. The size of this array is equal to the number 3408 of local rows, i.e 'm'. 3409 3410 If the *_nnz parameter is given then the *_nz parameter is ignored 3411 3412 The AIJ format (also called the Yale sparse matrix format or 3413 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3414 storage. The stored row and column indices begin with zero. 3415 See Users-Manual: ch_mat for details. 3416 3417 The parallel matrix is partitioned such that the first m0 rows belong to 3418 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3419 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3420 3421 The DIAGONAL portion of the local submatrix of a processor can be defined 3422 as the submatrix which is obtained by extraction the part corresponding to 3423 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3424 first row that belongs to the processor, r2 is the last row belonging to 3425 the this processor, and c1-c2 is range of indices of the local part of a 3426 vector suitable for applying the matrix to. This is an mxn matrix. In the 3427 common case of a square matrix, the row and column ranges are the same and 3428 the DIAGONAL part is also square. The remaining portion of the local 3429 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3430 3431 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3432 3433 You can call MatGetInfo() to get information on how effective the preallocation was; 3434 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3435 You can also run with the option -info and look for messages with the string 3436 malloc in them to see if additional memory allocation was needed. 3437 3438 Example usage: 3439 3440 Consider the following 8x8 matrix with 34 non-zero values, that is 3441 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3442 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3443 as follows: 3444 3445 .vb 3446 1 2 0 | 0 3 0 | 0 4 3447 Proc0 0 5 6 | 7 0 0 | 8 0 3448 9 0 10 | 11 0 0 | 12 0 3449 ------------------------------------- 3450 13 0 14 | 15 16 17 | 0 0 3451 Proc1 0 18 0 | 19 20 21 | 0 0 3452 0 0 0 | 22 23 0 | 24 0 3453 ------------------------------------- 3454 Proc2 25 26 27 | 0 0 28 | 29 0 3455 30 0 0 | 31 32 33 | 0 34 3456 .ve 3457 3458 This can be represented as a collection of submatrices as: 3459 3460 .vb 3461 A B C 3462 D E F 3463 G H I 3464 .ve 3465 3466 Where the submatrices A,B,C are owned by proc0, D,E,F are 3467 owned by proc1, G,H,I are owned by proc2. 3468 3469 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3470 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3471 The 'M','N' parameters are 8,8, and have the same values on all procs. 3472 3473 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3474 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3475 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3476 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3477 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3478 matrix, ans [DF] as another SeqAIJ matrix. 3479 3480 When d_nz, o_nz parameters are specified, d_nz storage elements are 3481 allocated for every row of the local diagonal submatrix, and o_nz 3482 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3483 One way to choose d_nz and o_nz is to use the max nonzerors per local 3484 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3485 In this case, the values of d_nz,o_nz are: 3486 .vb 3487 proc0 : dnz = 2, o_nz = 2 3488 proc1 : dnz = 3, o_nz = 2 3489 proc2 : dnz = 1, o_nz = 4 3490 .ve 3491 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3492 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3493 for proc3. i.e we are using 12+15+10=37 storage locations to store 3494 34 values. 3495 3496 When d_nnz, o_nnz parameters are specified, the storage is specified 3497 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3498 In the above case the values for d_nnz,o_nnz are: 3499 .vb 3500 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3501 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3502 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3503 .ve 3504 Here the space allocated is sum of all the above values i.e 34, and 3505 hence pre-allocation is perfect. 3506 3507 Level: intermediate 3508 3509 .keywords: matrix, aij, compressed row, sparse, parallel 3510 3511 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3512 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3513 @*/ 3514 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3515 { 3516 PetscErrorCode ierr; 3517 3518 PetscFunctionBegin; 3519 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3520 PetscValidType(B,1); 3521 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3522 PetscFunctionReturn(0); 3523 } 3524 3525 #undef __FUNCT__ 3526 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3527 /*@ 3528 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3529 CSR format the local rows. 3530 3531 Collective on MPI_Comm 3532 3533 Input Parameters: 3534 + comm - MPI communicator 3535 . m - number of local rows (Cannot be PETSC_DECIDE) 3536 . n - This value should be the same as the local size used in creating the 3537 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3538 calculated if N is given) For square matrices n is almost always m. 3539 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3540 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3541 . i - row indices 3542 . j - column indices 3543 - a - matrix values 3544 3545 Output Parameter: 3546 . mat - the matrix 3547 3548 Level: intermediate 3549 3550 Notes: 3551 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3552 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3553 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3554 3555 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3556 3557 The format which is used for the sparse matrix input, is equivalent to a 3558 row-major ordering.. i.e for the following matrix, the input data expected is 3559 as shown: 3560 3561 1 0 0 3562 2 0 3 P0 3563 ------- 3564 4 5 6 P1 3565 3566 Process0 [P0]: rows_owned=[0,1] 3567 i = {0,1,3} [size = nrow+1 = 2+1] 3568 j = {0,0,2} [size = nz = 6] 3569 v = {1,2,3} [size = nz = 6] 3570 3571 Process1 [P1]: rows_owned=[2] 3572 i = {0,3} [size = nrow+1 = 1+1] 3573 j = {0,1,2} [size = nz = 6] 3574 v = {4,5,6} [size = nz = 6] 3575 3576 .keywords: matrix, aij, compressed row, sparse, parallel 3577 3578 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3579 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3580 @*/ 3581 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3582 { 3583 PetscErrorCode ierr; 3584 3585 PetscFunctionBegin; 3586 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3587 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3588 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3589 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3590 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3591 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3592 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3593 PetscFunctionReturn(0); 3594 } 3595 3596 #undef __FUNCT__ 3597 #define __FUNCT__ "MatCreateAIJ" 3598 /*@C 3599 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3600 (the default parallel PETSc format). For good matrix assembly performance 3601 the user should preallocate the matrix storage by setting the parameters 3602 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3603 performance can be increased by more than a factor of 50. 3604 3605 Collective on MPI_Comm 3606 3607 Input Parameters: 3608 + comm - MPI communicator 3609 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3610 This value should be the same as the local size used in creating the 3611 y vector for the matrix-vector product y = Ax. 3612 . n - This value should be the same as the local size used in creating the 3613 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3614 calculated if N is given) For square matrices n is almost always m. 3615 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3616 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3617 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3618 (same value is used for all local rows) 3619 . d_nnz - array containing the number of nonzeros in the various rows of the 3620 DIAGONAL portion of the local submatrix (possibly different for each row) 3621 or NULL, if d_nz is used to specify the nonzero structure. 3622 The size of this array is equal to the number of local rows, i.e 'm'. 3623 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3624 submatrix (same value is used for all local rows). 3625 - o_nnz - array containing the number of nonzeros in the various rows of the 3626 OFF-DIAGONAL portion of the local submatrix (possibly different for 3627 each row) or NULL, if o_nz is used to specify the nonzero 3628 structure. The size of this array is equal to the number 3629 of local rows, i.e 'm'. 3630 3631 Output Parameter: 3632 . A - the matrix 3633 3634 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3635 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3636 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3637 3638 Notes: 3639 If the *_nnz parameter is given then the *_nz parameter is ignored 3640 3641 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3642 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3643 storage requirements for this matrix. 3644 3645 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3646 processor than it must be used on all processors that share the object for 3647 that argument. 3648 3649 The user MUST specify either the local or global matrix dimensions 3650 (possibly both). 3651 3652 The parallel matrix is partitioned across processors such that the 3653 first m0 rows belong to process 0, the next m1 rows belong to 3654 process 1, the next m2 rows belong to process 2 etc.. where 3655 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3656 values corresponding to [m x N] submatrix. 3657 3658 The columns are logically partitioned with the n0 columns belonging 3659 to 0th partition, the next n1 columns belonging to the next 3660 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3661 3662 The DIAGONAL portion of the local submatrix on any given processor 3663 is the submatrix corresponding to the rows and columns m,n 3664 corresponding to the given processor. i.e diagonal matrix on 3665 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3666 etc. The remaining portion of the local submatrix [m x (N-n)] 3667 constitute the OFF-DIAGONAL portion. The example below better 3668 illustrates this concept. 3669 3670 For a square global matrix we define each processor's diagonal portion 3671 to be its local rows and the corresponding columns (a square submatrix); 3672 each processor's off-diagonal portion encompasses the remainder of the 3673 local matrix (a rectangular submatrix). 3674 3675 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3676 3677 When calling this routine with a single process communicator, a matrix of 3678 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3679 type of communicator, use the construction mechanism: 3680 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3681 3682 By default, this format uses inodes (identical nodes) when possible. 3683 We search for consecutive rows with the same nonzero structure, thereby 3684 reusing matrix information to achieve increased efficiency. 3685 3686 Options Database Keys: 3687 + -mat_no_inode - Do not use inodes 3688 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3689 - -mat_aij_oneindex - Internally use indexing starting at 1 3690 rather than 0. Note that when calling MatSetValues(), 3691 the user still MUST index entries starting at 0! 3692 3693 3694 Example usage: 3695 3696 Consider the following 8x8 matrix with 34 non-zero values, that is 3697 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3698 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3699 as follows: 3700 3701 .vb 3702 1 2 0 | 0 3 0 | 0 4 3703 Proc0 0 5 6 | 7 0 0 | 8 0 3704 9 0 10 | 11 0 0 | 12 0 3705 ------------------------------------- 3706 13 0 14 | 15 16 17 | 0 0 3707 Proc1 0 18 0 | 19 20 21 | 0 0 3708 0 0 0 | 22 23 0 | 24 0 3709 ------------------------------------- 3710 Proc2 25 26 27 | 0 0 28 | 29 0 3711 30 0 0 | 31 32 33 | 0 34 3712 .ve 3713 3714 This can be represented as a collection of submatrices as: 3715 3716 .vb 3717 A B C 3718 D E F 3719 G H I 3720 .ve 3721 3722 Where the submatrices A,B,C are owned by proc0, D,E,F are 3723 owned by proc1, G,H,I are owned by proc2. 3724 3725 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3726 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3727 The 'M','N' parameters are 8,8, and have the same values on all procs. 3728 3729 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3730 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3731 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3732 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3733 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3734 matrix, ans [DF] as another SeqAIJ matrix. 3735 3736 When d_nz, o_nz parameters are specified, d_nz storage elements are 3737 allocated for every row of the local diagonal submatrix, and o_nz 3738 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3739 One way to choose d_nz and o_nz is to use the max nonzerors per local 3740 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3741 In this case, the values of d_nz,o_nz are: 3742 .vb 3743 proc0 : dnz = 2, o_nz = 2 3744 proc1 : dnz = 3, o_nz = 2 3745 proc2 : dnz = 1, o_nz = 4 3746 .ve 3747 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3748 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3749 for proc3. i.e we are using 12+15+10=37 storage locations to store 3750 34 values. 3751 3752 When d_nnz, o_nnz parameters are specified, the storage is specified 3753 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3754 In the above case the values for d_nnz,o_nnz are: 3755 .vb 3756 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3757 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3758 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3759 .ve 3760 Here the space allocated is sum of all the above values i.e 34, and 3761 hence pre-allocation is perfect. 3762 3763 Level: intermediate 3764 3765 .keywords: matrix, aij, compressed row, sparse, parallel 3766 3767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3768 MPIAIJ, MatCreateMPIAIJWithArrays() 3769 @*/ 3770 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3771 { 3772 PetscErrorCode ierr; 3773 PetscMPIInt size; 3774 3775 PetscFunctionBegin; 3776 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3777 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3778 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3779 if (size > 1) { 3780 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3781 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3782 } else { 3783 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3784 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3785 } 3786 PetscFunctionReturn(0); 3787 } 3788 3789 #undef __FUNCT__ 3790 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3791 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3792 { 3793 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3794 3795 PetscFunctionBegin; 3796 if (Ad) *Ad = a->A; 3797 if (Ao) *Ao = a->B; 3798 if (colmap) *colmap = a->garray; 3799 PetscFunctionReturn(0); 3800 } 3801 3802 #undef __FUNCT__ 3803 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3804 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3805 { 3806 PetscErrorCode ierr; 3807 PetscInt i; 3808 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3809 3810 PetscFunctionBegin; 3811 if (coloring->ctype == IS_COLORING_GLOBAL) { 3812 ISColoringValue *allcolors,*colors; 3813 ISColoring ocoloring; 3814 3815 /* set coloring for diagonal portion */ 3816 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3817 3818 /* set coloring for off-diagonal portion */ 3819 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3820 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3821 for (i=0; i<a->B->cmap->n; i++) { 3822 colors[i] = allcolors[a->garray[i]]; 3823 } 3824 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3825 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3826 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3827 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3828 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3829 ISColoringValue *colors; 3830 PetscInt *larray; 3831 ISColoring ocoloring; 3832 3833 /* set coloring for diagonal portion */ 3834 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3835 for (i=0; i<a->A->cmap->n; i++) { 3836 larray[i] = i + A->cmap->rstart; 3837 } 3838 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3839 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3840 for (i=0; i<a->A->cmap->n; i++) { 3841 colors[i] = coloring->colors[larray[i]]; 3842 } 3843 ierr = PetscFree(larray);CHKERRQ(ierr); 3844 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3845 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3846 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3847 3848 /* set coloring for off-diagonal portion */ 3849 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3850 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3851 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3852 for (i=0; i<a->B->cmap->n; i++) { 3853 colors[i] = coloring->colors[larray[i]]; 3854 } 3855 ierr = PetscFree(larray);CHKERRQ(ierr); 3856 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3857 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3858 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3859 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3860 PetscFunctionReturn(0); 3861 } 3862 3863 #undef __FUNCT__ 3864 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3865 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3866 { 3867 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3868 PetscErrorCode ierr; 3869 3870 PetscFunctionBegin; 3871 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3872 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3873 PetscFunctionReturn(0); 3874 } 3875 3876 #undef __FUNCT__ 3877 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3878 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3879 { 3880 PetscErrorCode ierr; 3881 PetscInt m,N,i,rstart,nnz,Ii; 3882 PetscInt *indx; 3883 PetscScalar *values; 3884 3885 PetscFunctionBegin; 3886 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3887 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3888 PetscInt *dnz,*onz,sum,bs,cbs; 3889 3890 if (n == PETSC_DECIDE) { 3891 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3892 } 3893 /* Check sum(n) = N */ 3894 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3895 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3896 3897 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3898 rstart -= m; 3899 3900 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3901 for (i=0; i<m; i++) { 3902 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3903 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3904 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3905 } 3906 3907 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3908 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3909 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3910 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3911 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3912 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3913 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3914 } 3915 3916 /* numeric phase */ 3917 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3918 for (i=0; i<m; i++) { 3919 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3920 Ii = i + rstart; 3921 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3922 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3923 } 3924 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3925 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3926 PetscFunctionReturn(0); 3927 } 3928 3929 #undef __FUNCT__ 3930 #define __FUNCT__ "MatFileSplit" 3931 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3932 { 3933 PetscErrorCode ierr; 3934 PetscMPIInt rank; 3935 PetscInt m,N,i,rstart,nnz; 3936 size_t len; 3937 const PetscInt *indx; 3938 PetscViewer out; 3939 char *name; 3940 Mat B; 3941 const PetscScalar *values; 3942 3943 PetscFunctionBegin; 3944 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3945 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3946 /* Should this be the type of the diagonal block of A? */ 3947 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3948 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3949 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3950 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3951 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3952 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3953 for (i=0; i<m; i++) { 3954 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3955 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3956 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3957 } 3958 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3959 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3960 3961 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3962 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3963 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3964 sprintf(name,"%s.%d",outfile,rank); 3965 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3966 ierr = PetscFree(name);CHKERRQ(ierr); 3967 ierr = MatView(B,out);CHKERRQ(ierr); 3968 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3969 ierr = MatDestroy(&B);CHKERRQ(ierr); 3970 PetscFunctionReturn(0); 3971 } 3972 3973 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3974 #undef __FUNCT__ 3975 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3976 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3977 { 3978 PetscErrorCode ierr; 3979 Mat_Merge_SeqsToMPI *merge; 3980 PetscContainer container; 3981 3982 PetscFunctionBegin; 3983 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3984 if (container) { 3985 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3986 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3987 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3988 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3989 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3990 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3991 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3992 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3993 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3994 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3995 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3996 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3997 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3998 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3999 ierr = PetscFree(merge);CHKERRQ(ierr); 4000 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4001 } 4002 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4003 PetscFunctionReturn(0); 4004 } 4005 4006 #include <../src/mat/utils/freespace.h> 4007 #include <petscbt.h> 4008 4009 #undef __FUNCT__ 4010 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4011 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4012 { 4013 PetscErrorCode ierr; 4014 MPI_Comm comm; 4015 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4016 PetscMPIInt size,rank,taga,*len_s; 4017 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4018 PetscInt proc,m; 4019 PetscInt **buf_ri,**buf_rj; 4020 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4021 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4022 MPI_Request *s_waits,*r_waits; 4023 MPI_Status *status; 4024 MatScalar *aa=a->a; 4025 MatScalar **abuf_r,*ba_i; 4026 Mat_Merge_SeqsToMPI *merge; 4027 PetscContainer container; 4028 4029 PetscFunctionBegin; 4030 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4031 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4032 4033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4034 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4035 4036 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4037 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4038 4039 bi = merge->bi; 4040 bj = merge->bj; 4041 buf_ri = merge->buf_ri; 4042 buf_rj = merge->buf_rj; 4043 4044 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4045 owners = merge->rowmap->range; 4046 len_s = merge->len_s; 4047 4048 /* send and recv matrix values */ 4049 /*-----------------------------*/ 4050 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4051 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4052 4053 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4054 for (proc=0,k=0; proc<size; proc++) { 4055 if (!len_s[proc]) continue; 4056 i = owners[proc]; 4057 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4058 k++; 4059 } 4060 4061 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4062 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4063 ierr = PetscFree(status);CHKERRQ(ierr); 4064 4065 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4066 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4067 4068 /* insert mat values of mpimat */ 4069 /*----------------------------*/ 4070 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4071 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4072 4073 for (k=0; k<merge->nrecv; k++) { 4074 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4075 nrows = *(buf_ri_k[k]); 4076 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4077 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4078 } 4079 4080 /* set values of ba */ 4081 m = merge->rowmap->n; 4082 for (i=0; i<m; i++) { 4083 arow = owners[rank] + i; 4084 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4085 bnzi = bi[i+1] - bi[i]; 4086 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4087 4088 /* add local non-zero vals of this proc's seqmat into ba */ 4089 anzi = ai[arow+1] - ai[arow]; 4090 aj = a->j + ai[arow]; 4091 aa = a->a + ai[arow]; 4092 nextaj = 0; 4093 for (j=0; nextaj<anzi; j++) { 4094 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4095 ba_i[j] += aa[nextaj++]; 4096 } 4097 } 4098 4099 /* add received vals into ba */ 4100 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4101 /* i-th row */ 4102 if (i == *nextrow[k]) { 4103 anzi = *(nextai[k]+1) - *nextai[k]; 4104 aj = buf_rj[k] + *(nextai[k]); 4105 aa = abuf_r[k] + *(nextai[k]); 4106 nextaj = 0; 4107 for (j=0; nextaj<anzi; j++) { 4108 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4109 ba_i[j] += aa[nextaj++]; 4110 } 4111 } 4112 nextrow[k]++; nextai[k]++; 4113 } 4114 } 4115 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4116 } 4117 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4118 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4119 4120 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4121 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4122 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4123 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4124 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4125 PetscFunctionReturn(0); 4126 } 4127 4128 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4129 4130 #undef __FUNCT__ 4131 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4132 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4133 { 4134 PetscErrorCode ierr; 4135 Mat B_mpi; 4136 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4137 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4138 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4139 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4140 PetscInt len,proc,*dnz,*onz,bs,cbs; 4141 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4142 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4143 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4144 MPI_Status *status; 4145 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4146 PetscBT lnkbt; 4147 Mat_Merge_SeqsToMPI *merge; 4148 PetscContainer container; 4149 4150 PetscFunctionBegin; 4151 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4152 4153 /* make sure it is a PETSc comm */ 4154 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4155 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4156 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4157 4158 ierr = PetscNew(&merge);CHKERRQ(ierr); 4159 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4160 4161 /* determine row ownership */ 4162 /*---------------------------------------------------------*/ 4163 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4164 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4165 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4166 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4167 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4168 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4169 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4170 4171 m = merge->rowmap->n; 4172 owners = merge->rowmap->range; 4173 4174 /* determine the number of messages to send, their lengths */ 4175 /*---------------------------------------------------------*/ 4176 len_s = merge->len_s; 4177 4178 len = 0; /* length of buf_si[] */ 4179 merge->nsend = 0; 4180 for (proc=0; proc<size; proc++) { 4181 len_si[proc] = 0; 4182 if (proc == rank) { 4183 len_s[proc] = 0; 4184 } else { 4185 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4186 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4187 } 4188 if (len_s[proc]) { 4189 merge->nsend++; 4190 nrows = 0; 4191 for (i=owners[proc]; i<owners[proc+1]; i++) { 4192 if (ai[i+1] > ai[i]) nrows++; 4193 } 4194 len_si[proc] = 2*(nrows+1); 4195 len += len_si[proc]; 4196 } 4197 } 4198 4199 /* determine the number and length of messages to receive for ij-structure */ 4200 /*-------------------------------------------------------------------------*/ 4201 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4202 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4203 4204 /* post the Irecv of j-structure */ 4205 /*-------------------------------*/ 4206 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4207 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4208 4209 /* post the Isend of j-structure */ 4210 /*--------------------------------*/ 4211 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4212 4213 for (proc=0, k=0; proc<size; proc++) { 4214 if (!len_s[proc]) continue; 4215 i = owners[proc]; 4216 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4217 k++; 4218 } 4219 4220 /* receives and sends of j-structure are complete */ 4221 /*------------------------------------------------*/ 4222 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4223 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4224 4225 /* send and recv i-structure */ 4226 /*---------------------------*/ 4227 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4228 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4229 4230 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4231 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4232 for (proc=0,k=0; proc<size; proc++) { 4233 if (!len_s[proc]) continue; 4234 /* form outgoing message for i-structure: 4235 buf_si[0]: nrows to be sent 4236 [1:nrows]: row index (global) 4237 [nrows+1:2*nrows+1]: i-structure index 4238 */ 4239 /*-------------------------------------------*/ 4240 nrows = len_si[proc]/2 - 1; 4241 buf_si_i = buf_si + nrows+1; 4242 buf_si[0] = nrows; 4243 buf_si_i[0] = 0; 4244 nrows = 0; 4245 for (i=owners[proc]; i<owners[proc+1]; i++) { 4246 anzi = ai[i+1] - ai[i]; 4247 if (anzi) { 4248 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4249 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4250 nrows++; 4251 } 4252 } 4253 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4254 k++; 4255 buf_si += len_si[proc]; 4256 } 4257 4258 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4259 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4260 4261 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4262 for (i=0; i<merge->nrecv; i++) { 4263 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4264 } 4265 4266 ierr = PetscFree(len_si);CHKERRQ(ierr); 4267 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4268 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4269 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4270 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4271 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4272 ierr = PetscFree(status);CHKERRQ(ierr); 4273 4274 /* compute a local seq matrix in each processor */ 4275 /*----------------------------------------------*/ 4276 /* allocate bi array and free space for accumulating nonzero column info */ 4277 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4278 bi[0] = 0; 4279 4280 /* create and initialize a linked list */ 4281 nlnk = N+1; 4282 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4283 4284 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4285 len = ai[owners[rank+1]] - ai[owners[rank]]; 4286 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4287 4288 current_space = free_space; 4289 4290 /* determine symbolic info for each local row */ 4291 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4292 4293 for (k=0; k<merge->nrecv; k++) { 4294 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4295 nrows = *buf_ri_k[k]; 4296 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4297 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4298 } 4299 4300 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4301 len = 0; 4302 for (i=0; i<m; i++) { 4303 bnzi = 0; 4304 /* add local non-zero cols of this proc's seqmat into lnk */ 4305 arow = owners[rank] + i; 4306 anzi = ai[arow+1] - ai[arow]; 4307 aj = a->j + ai[arow]; 4308 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4309 bnzi += nlnk; 4310 /* add received col data into lnk */ 4311 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4312 if (i == *nextrow[k]) { /* i-th row */ 4313 anzi = *(nextai[k]+1) - *nextai[k]; 4314 aj = buf_rj[k] + *nextai[k]; 4315 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4316 bnzi += nlnk; 4317 nextrow[k]++; nextai[k]++; 4318 } 4319 } 4320 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4321 4322 /* if free space is not available, make more free space */ 4323 if (current_space->local_remaining<bnzi) { 4324 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4325 nspacedouble++; 4326 } 4327 /* copy data into free space, then initialize lnk */ 4328 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4329 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4330 4331 current_space->array += bnzi; 4332 current_space->local_used += bnzi; 4333 current_space->local_remaining -= bnzi; 4334 4335 bi[i+1] = bi[i] + bnzi; 4336 } 4337 4338 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4339 4340 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4341 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4342 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4343 4344 /* create symbolic parallel matrix B_mpi */ 4345 /*---------------------------------------*/ 4346 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4347 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4348 if (n==PETSC_DECIDE) { 4349 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4350 } else { 4351 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4352 } 4353 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4354 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4355 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4356 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4357 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4358 4359 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4360 B_mpi->assembled = PETSC_FALSE; 4361 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4362 merge->bi = bi; 4363 merge->bj = bj; 4364 merge->buf_ri = buf_ri; 4365 merge->buf_rj = buf_rj; 4366 merge->coi = NULL; 4367 merge->coj = NULL; 4368 merge->owners_co = NULL; 4369 4370 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4371 4372 /* attach the supporting struct to B_mpi for reuse */ 4373 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4374 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4375 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4376 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4377 *mpimat = B_mpi; 4378 4379 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4380 PetscFunctionReturn(0); 4381 } 4382 4383 #undef __FUNCT__ 4384 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4385 /*@C 4386 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4387 matrices from each processor 4388 4389 Collective on MPI_Comm 4390 4391 Input Parameters: 4392 + comm - the communicators the parallel matrix will live on 4393 . seqmat - the input sequential matrices 4394 . m - number of local rows (or PETSC_DECIDE) 4395 . n - number of local columns (or PETSC_DECIDE) 4396 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4397 4398 Output Parameter: 4399 . mpimat - the parallel matrix generated 4400 4401 Level: advanced 4402 4403 Notes: 4404 The dimensions of the sequential matrix in each processor MUST be the same. 4405 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4406 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4407 @*/ 4408 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4409 { 4410 PetscErrorCode ierr; 4411 PetscMPIInt size; 4412 4413 PetscFunctionBegin; 4414 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4415 if (size == 1) { 4416 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4417 if (scall == MAT_INITIAL_MATRIX) { 4418 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4419 } else { 4420 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4421 } 4422 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4423 PetscFunctionReturn(0); 4424 } 4425 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4426 if (scall == MAT_INITIAL_MATRIX) { 4427 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4428 } 4429 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4430 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4431 PetscFunctionReturn(0); 4432 } 4433 4434 #undef __FUNCT__ 4435 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4436 /*@ 4437 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4438 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4439 with MatGetSize() 4440 4441 Not Collective 4442 4443 Input Parameters: 4444 + A - the matrix 4445 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4446 4447 Output Parameter: 4448 . A_loc - the local sequential matrix generated 4449 4450 Level: developer 4451 4452 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4453 4454 @*/ 4455 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4456 { 4457 PetscErrorCode ierr; 4458 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4459 Mat_SeqAIJ *mat,*a,*b; 4460 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4461 MatScalar *aa,*ba,*cam; 4462 PetscScalar *ca; 4463 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4464 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4465 PetscBool match; 4466 MPI_Comm comm; 4467 PetscMPIInt size; 4468 4469 PetscFunctionBegin; 4470 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4471 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4472 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4473 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4474 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4475 4476 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4477 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4478 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4479 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4480 aa = a->a; ba = b->a; 4481 if (scall == MAT_INITIAL_MATRIX) { 4482 if (size == 1) { 4483 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4484 PetscFunctionReturn(0); 4485 } 4486 4487 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4488 ci[0] = 0; 4489 for (i=0; i<am; i++) { 4490 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4491 } 4492 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4493 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4494 k = 0; 4495 for (i=0; i<am; i++) { 4496 ncols_o = bi[i+1] - bi[i]; 4497 ncols_d = ai[i+1] - ai[i]; 4498 /* off-diagonal portion of A */ 4499 for (jo=0; jo<ncols_o; jo++) { 4500 col = cmap[*bj]; 4501 if (col >= cstart) break; 4502 cj[k] = col; bj++; 4503 ca[k++] = *ba++; 4504 } 4505 /* diagonal portion of A */ 4506 for (j=0; j<ncols_d; j++) { 4507 cj[k] = cstart + *aj++; 4508 ca[k++] = *aa++; 4509 } 4510 /* off-diagonal portion of A */ 4511 for (j=jo; j<ncols_o; j++) { 4512 cj[k] = cmap[*bj++]; 4513 ca[k++] = *ba++; 4514 } 4515 } 4516 /* put together the new matrix */ 4517 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4518 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4519 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4520 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4521 mat->free_a = PETSC_TRUE; 4522 mat->free_ij = PETSC_TRUE; 4523 mat->nonew = 0; 4524 } else if (scall == MAT_REUSE_MATRIX) { 4525 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4526 ci = mat->i; cj = mat->j; cam = mat->a; 4527 for (i=0; i<am; i++) { 4528 /* off-diagonal portion of A */ 4529 ncols_o = bi[i+1] - bi[i]; 4530 for (jo=0; jo<ncols_o; jo++) { 4531 col = cmap[*bj]; 4532 if (col >= cstart) break; 4533 *cam++ = *ba++; bj++; 4534 } 4535 /* diagonal portion of A */ 4536 ncols_d = ai[i+1] - ai[i]; 4537 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4538 /* off-diagonal portion of A */ 4539 for (j=jo; j<ncols_o; j++) { 4540 *cam++ = *ba++; bj++; 4541 } 4542 } 4543 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4544 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4545 PetscFunctionReturn(0); 4546 } 4547 4548 #undef __FUNCT__ 4549 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4550 /*@C 4551 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4552 4553 Not Collective 4554 4555 Input Parameters: 4556 + A - the matrix 4557 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4558 - row, col - index sets of rows and columns to extract (or NULL) 4559 4560 Output Parameter: 4561 . A_loc - the local sequential matrix generated 4562 4563 Level: developer 4564 4565 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4566 4567 @*/ 4568 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4569 { 4570 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4571 PetscErrorCode ierr; 4572 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4573 IS isrowa,iscola; 4574 Mat *aloc; 4575 PetscBool match; 4576 4577 PetscFunctionBegin; 4578 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4579 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4580 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4581 if (!row) { 4582 start = A->rmap->rstart; end = A->rmap->rend; 4583 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4584 } else { 4585 isrowa = *row; 4586 } 4587 if (!col) { 4588 start = A->cmap->rstart; 4589 cmap = a->garray; 4590 nzA = a->A->cmap->n; 4591 nzB = a->B->cmap->n; 4592 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4593 ncols = 0; 4594 for (i=0; i<nzB; i++) { 4595 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4596 else break; 4597 } 4598 imark = i; 4599 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4600 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4601 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4602 } else { 4603 iscola = *col; 4604 } 4605 if (scall != MAT_INITIAL_MATRIX) { 4606 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4607 aloc[0] = *A_loc; 4608 } 4609 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4610 *A_loc = aloc[0]; 4611 ierr = PetscFree(aloc);CHKERRQ(ierr); 4612 if (!row) { 4613 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4614 } 4615 if (!col) { 4616 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4617 } 4618 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4619 PetscFunctionReturn(0); 4620 } 4621 4622 #undef __FUNCT__ 4623 #define __FUNCT__ "MatGetBrowsOfAcols" 4624 /*@C 4625 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4626 4627 Collective on Mat 4628 4629 Input Parameters: 4630 + A,B - the matrices in mpiaij format 4631 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4632 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4633 4634 Output Parameter: 4635 + rowb, colb - index sets of rows and columns of B to extract 4636 - B_seq - the sequential matrix generated 4637 4638 Level: developer 4639 4640 @*/ 4641 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4642 { 4643 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4644 PetscErrorCode ierr; 4645 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4646 IS isrowb,iscolb; 4647 Mat *bseq=NULL; 4648 4649 PetscFunctionBegin; 4650 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4651 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4652 } 4653 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4654 4655 if (scall == MAT_INITIAL_MATRIX) { 4656 start = A->cmap->rstart; 4657 cmap = a->garray; 4658 nzA = a->A->cmap->n; 4659 nzB = a->B->cmap->n; 4660 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4661 ncols = 0; 4662 for (i=0; i<nzB; i++) { /* row < local row index */ 4663 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4664 else break; 4665 } 4666 imark = i; 4667 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4668 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4669 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4670 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4671 } else { 4672 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4673 isrowb = *rowb; iscolb = *colb; 4674 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4675 bseq[0] = *B_seq; 4676 } 4677 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4678 *B_seq = bseq[0]; 4679 ierr = PetscFree(bseq);CHKERRQ(ierr); 4680 if (!rowb) { 4681 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4682 } else { 4683 *rowb = isrowb; 4684 } 4685 if (!colb) { 4686 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4687 } else { 4688 *colb = iscolb; 4689 } 4690 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4691 PetscFunctionReturn(0); 4692 } 4693 4694 #undef __FUNCT__ 4695 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4696 /* 4697 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4698 of the OFF-DIAGONAL portion of local A 4699 4700 Collective on Mat 4701 4702 Input Parameters: 4703 + A,B - the matrices in mpiaij format 4704 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4705 4706 Output Parameter: 4707 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4708 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4709 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4710 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4711 4712 Level: developer 4713 4714 */ 4715 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4716 { 4717 VecScatter_MPI_General *gen_to,*gen_from; 4718 PetscErrorCode ierr; 4719 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4720 Mat_SeqAIJ *b_oth; 4721 VecScatter ctx =a->Mvctx; 4722 MPI_Comm comm; 4723 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4724 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4725 PetscScalar *rvalues,*svalues; 4726 MatScalar *b_otha,*bufa,*bufA; 4727 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4728 MPI_Request *rwaits = NULL,*swaits = NULL; 4729 MPI_Status *sstatus,rstatus; 4730 PetscMPIInt jj,size; 4731 PetscInt *cols,sbs,rbs; 4732 PetscScalar *vals; 4733 4734 PetscFunctionBegin; 4735 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4736 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4737 4738 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4739 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4740 } 4741 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4742 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4743 4744 gen_to = (VecScatter_MPI_General*)ctx->todata; 4745 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4746 rvalues = gen_from->values; /* holds the length of receiving row */ 4747 svalues = gen_to->values; /* holds the length of sending row */ 4748 nrecvs = gen_from->n; 4749 nsends = gen_to->n; 4750 4751 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4752 srow = gen_to->indices; /* local row index to be sent */ 4753 sstarts = gen_to->starts; 4754 sprocs = gen_to->procs; 4755 sstatus = gen_to->sstatus; 4756 sbs = gen_to->bs; 4757 rstarts = gen_from->starts; 4758 rprocs = gen_from->procs; 4759 rbs = gen_from->bs; 4760 4761 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4762 if (scall == MAT_INITIAL_MATRIX) { 4763 /* i-array */ 4764 /*---------*/ 4765 /* post receives */ 4766 for (i=0; i<nrecvs; i++) { 4767 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4768 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4769 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4770 } 4771 4772 /* pack the outgoing message */ 4773 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4774 4775 sstartsj[0] = 0; 4776 rstartsj[0] = 0; 4777 len = 0; /* total length of j or a array to be sent */ 4778 k = 0; 4779 for (i=0; i<nsends; i++) { 4780 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4781 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4782 for (j=0; j<nrows; j++) { 4783 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4784 for (l=0; l<sbs; l++) { 4785 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4786 4787 rowlen[j*sbs+l] = ncols; 4788 4789 len += ncols; 4790 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4791 } 4792 k++; 4793 } 4794 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4795 4796 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4797 } 4798 /* recvs and sends of i-array are completed */ 4799 i = nrecvs; 4800 while (i--) { 4801 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4802 } 4803 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4804 4805 /* allocate buffers for sending j and a arrays */ 4806 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4807 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4808 4809 /* create i-array of B_oth */ 4810 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4811 4812 b_othi[0] = 0; 4813 len = 0; /* total length of j or a array to be received */ 4814 k = 0; 4815 for (i=0; i<nrecvs; i++) { 4816 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4817 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4818 for (j=0; j<nrows; j++) { 4819 b_othi[k+1] = b_othi[k] + rowlen[j]; 4820 len += rowlen[j]; k++; 4821 } 4822 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4823 } 4824 4825 /* allocate space for j and a arrrays of B_oth */ 4826 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4827 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4828 4829 /* j-array */ 4830 /*---------*/ 4831 /* post receives of j-array */ 4832 for (i=0; i<nrecvs; i++) { 4833 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4834 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4835 } 4836 4837 /* pack the outgoing message j-array */ 4838 k = 0; 4839 for (i=0; i<nsends; i++) { 4840 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4841 bufJ = bufj+sstartsj[i]; 4842 for (j=0; j<nrows; j++) { 4843 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4844 for (ll=0; ll<sbs; ll++) { 4845 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4846 for (l=0; l<ncols; l++) { 4847 *bufJ++ = cols[l]; 4848 } 4849 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4850 } 4851 } 4852 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4853 } 4854 4855 /* recvs and sends of j-array are completed */ 4856 i = nrecvs; 4857 while (i--) { 4858 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4859 } 4860 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4861 } else if (scall == MAT_REUSE_MATRIX) { 4862 sstartsj = *startsj_s; 4863 rstartsj = *startsj_r; 4864 bufa = *bufa_ptr; 4865 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4866 b_otha = b_oth->a; 4867 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4868 4869 /* a-array */ 4870 /*---------*/ 4871 /* post receives of a-array */ 4872 for (i=0; i<nrecvs; i++) { 4873 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4874 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4875 } 4876 4877 /* pack the outgoing message a-array */ 4878 k = 0; 4879 for (i=0; i<nsends; i++) { 4880 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4881 bufA = bufa+sstartsj[i]; 4882 for (j=0; j<nrows; j++) { 4883 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4884 for (ll=0; ll<sbs; ll++) { 4885 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4886 for (l=0; l<ncols; l++) { 4887 *bufA++ = vals[l]; 4888 } 4889 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4890 } 4891 } 4892 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4893 } 4894 /* recvs and sends of a-array are completed */ 4895 i = nrecvs; 4896 while (i--) { 4897 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4898 } 4899 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4900 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4901 4902 if (scall == MAT_INITIAL_MATRIX) { 4903 /* put together the new matrix */ 4904 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4905 4906 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4907 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4908 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4909 b_oth->free_a = PETSC_TRUE; 4910 b_oth->free_ij = PETSC_TRUE; 4911 b_oth->nonew = 0; 4912 4913 ierr = PetscFree(bufj);CHKERRQ(ierr); 4914 if (!startsj_s || !bufa_ptr) { 4915 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4916 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4917 } else { 4918 *startsj_s = sstartsj; 4919 *startsj_r = rstartsj; 4920 *bufa_ptr = bufa; 4921 } 4922 } 4923 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4924 PetscFunctionReturn(0); 4925 } 4926 4927 #undef __FUNCT__ 4928 #define __FUNCT__ "MatGetCommunicationStructs" 4929 /*@C 4930 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4931 4932 Not Collective 4933 4934 Input Parameters: 4935 . A - The matrix in mpiaij format 4936 4937 Output Parameter: 4938 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4939 . colmap - A map from global column index to local index into lvec 4940 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4941 4942 Level: developer 4943 4944 @*/ 4945 #if defined(PETSC_USE_CTABLE) 4946 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4947 #else 4948 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4949 #endif 4950 { 4951 Mat_MPIAIJ *a; 4952 4953 PetscFunctionBegin; 4954 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4955 PetscValidPointer(lvec, 2); 4956 PetscValidPointer(colmap, 3); 4957 PetscValidPointer(multScatter, 4); 4958 a = (Mat_MPIAIJ*) A->data; 4959 if (lvec) *lvec = a->lvec; 4960 if (colmap) *colmap = a->colmap; 4961 if (multScatter) *multScatter = a->Mvctx; 4962 PetscFunctionReturn(0); 4963 } 4964 4965 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4966 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4967 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4968 #if defined(PETSC_HAVE_ELEMENTAL) 4969 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4970 #endif 4971 4972 #undef __FUNCT__ 4973 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4974 /* 4975 Computes (B'*A')' since computing B*A directly is untenable 4976 4977 n p p 4978 ( ) ( ) ( ) 4979 m ( A ) * n ( B ) = m ( C ) 4980 ( ) ( ) ( ) 4981 4982 */ 4983 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4984 { 4985 PetscErrorCode ierr; 4986 Mat At,Bt,Ct; 4987 4988 PetscFunctionBegin; 4989 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4990 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4991 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4992 ierr = MatDestroy(&At);CHKERRQ(ierr); 4993 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4994 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4995 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4996 PetscFunctionReturn(0); 4997 } 4998 4999 #undef __FUNCT__ 5000 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5001 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5002 { 5003 PetscErrorCode ierr; 5004 PetscInt m=A->rmap->n,n=B->cmap->n; 5005 Mat Cmat; 5006 5007 PetscFunctionBegin; 5008 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5009 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5010 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5011 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5012 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5013 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5014 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5015 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5016 5017 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5018 5019 *C = Cmat; 5020 PetscFunctionReturn(0); 5021 } 5022 5023 /* ----------------------------------------------------------------*/ 5024 #undef __FUNCT__ 5025 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5026 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5027 { 5028 PetscErrorCode ierr; 5029 5030 PetscFunctionBegin; 5031 if (scall == MAT_INITIAL_MATRIX) { 5032 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5033 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5034 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5035 } 5036 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5037 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5038 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5039 PetscFunctionReturn(0); 5040 } 5041 5042 /*MC 5043 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5044 5045 Options Database Keys: 5046 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5047 5048 Level: beginner 5049 5050 .seealso: MatCreateAIJ() 5051 M*/ 5052 5053 #undef __FUNCT__ 5054 #define __FUNCT__ "MatCreate_MPIAIJ" 5055 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5056 { 5057 Mat_MPIAIJ *b; 5058 PetscErrorCode ierr; 5059 PetscMPIInt size; 5060 5061 PetscFunctionBegin; 5062 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5063 5064 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5065 B->data = (void*)b; 5066 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5067 B->assembled = PETSC_FALSE; 5068 B->insertmode = NOT_SET_VALUES; 5069 b->size = size; 5070 5071 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5072 5073 /* build cache for off array entries formed */ 5074 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5075 5076 b->donotstash = PETSC_FALSE; 5077 b->colmap = 0; 5078 b->garray = 0; 5079 b->roworiented = PETSC_TRUE; 5080 5081 /* stuff used for matrix vector multiply */ 5082 b->lvec = NULL; 5083 b->Mvctx = NULL; 5084 5085 /* stuff for MatGetRow() */ 5086 b->rowindices = 0; 5087 b->rowvalues = 0; 5088 b->getrowactive = PETSC_FALSE; 5089 5090 /* flexible pointer used in CUSP/CUSPARSE classes */ 5091 b->spptr = NULL; 5092 5093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5103 #if defined(PETSC_HAVE_ELEMENTAL) 5104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5105 #endif 5106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5109 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5110 PetscFunctionReturn(0); 5111 } 5112 5113 #undef __FUNCT__ 5114 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5115 /*@C 5116 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5117 and "off-diagonal" part of the matrix in CSR format. 5118 5119 Collective on MPI_Comm 5120 5121 Input Parameters: 5122 + comm - MPI communicator 5123 . m - number of local rows (Cannot be PETSC_DECIDE) 5124 . n - This value should be the same as the local size used in creating the 5125 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5126 calculated if N is given) For square matrices n is almost always m. 5127 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5128 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5129 . i - row indices for "diagonal" portion of matrix 5130 . j - column indices 5131 . a - matrix values 5132 . oi - row indices for "off-diagonal" portion of matrix 5133 . oj - column indices 5134 - oa - matrix values 5135 5136 Output Parameter: 5137 . mat - the matrix 5138 5139 Level: advanced 5140 5141 Notes: 5142 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5143 must free the arrays once the matrix has been destroyed and not before. 5144 5145 The i and j indices are 0 based 5146 5147 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5148 5149 This sets local rows and cannot be used to set off-processor values. 5150 5151 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5152 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5153 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5154 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5155 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5156 communication if it is known that only local entries will be set. 5157 5158 .keywords: matrix, aij, compressed row, sparse, parallel 5159 5160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5161 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5162 @*/ 5163 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5164 { 5165 PetscErrorCode ierr; 5166 Mat_MPIAIJ *maij; 5167 5168 PetscFunctionBegin; 5169 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5170 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5171 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5172 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5173 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5174 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5175 maij = (Mat_MPIAIJ*) (*mat)->data; 5176 5177 (*mat)->preallocated = PETSC_TRUE; 5178 5179 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5180 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5181 5182 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5183 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5184 5185 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5186 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5187 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5188 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5189 5190 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5191 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5192 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5193 PetscFunctionReturn(0); 5194 } 5195 5196 /* 5197 Special version for direct calls from Fortran 5198 */ 5199 #include <petsc/private/fortranimpl.h> 5200 5201 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5202 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5203 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5204 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5205 #endif 5206 5207 /* Change these macros so can be used in void function */ 5208 #undef CHKERRQ 5209 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5210 #undef SETERRQ2 5211 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5212 #undef SETERRQ3 5213 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5214 #undef SETERRQ 5215 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5216 5217 #undef __FUNCT__ 5218 #define __FUNCT__ "matsetvaluesmpiaij_" 5219 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5220 { 5221 Mat mat = *mmat; 5222 PetscInt m = *mm, n = *mn; 5223 InsertMode addv = *maddv; 5224 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5225 PetscScalar value; 5226 PetscErrorCode ierr; 5227 5228 MatCheckPreallocated(mat,1); 5229 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5230 5231 #if defined(PETSC_USE_DEBUG) 5232 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5233 #endif 5234 { 5235 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5236 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5237 PetscBool roworiented = aij->roworiented; 5238 5239 /* Some Variables required in the macro */ 5240 Mat A = aij->A; 5241 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5242 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5243 MatScalar *aa = a->a; 5244 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5245 Mat B = aij->B; 5246 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5247 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5248 MatScalar *ba = b->a; 5249 5250 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5251 PetscInt nonew = a->nonew; 5252 MatScalar *ap1,*ap2; 5253 5254 PetscFunctionBegin; 5255 for (i=0; i<m; i++) { 5256 if (im[i] < 0) continue; 5257 #if defined(PETSC_USE_DEBUG) 5258 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5259 #endif 5260 if (im[i] >= rstart && im[i] < rend) { 5261 row = im[i] - rstart; 5262 lastcol1 = -1; 5263 rp1 = aj + ai[row]; 5264 ap1 = aa + ai[row]; 5265 rmax1 = aimax[row]; 5266 nrow1 = ailen[row]; 5267 low1 = 0; 5268 high1 = nrow1; 5269 lastcol2 = -1; 5270 rp2 = bj + bi[row]; 5271 ap2 = ba + bi[row]; 5272 rmax2 = bimax[row]; 5273 nrow2 = bilen[row]; 5274 low2 = 0; 5275 high2 = nrow2; 5276 5277 for (j=0; j<n; j++) { 5278 if (roworiented) value = v[i*n+j]; 5279 else value = v[i+j*m]; 5280 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5281 if (in[j] >= cstart && in[j] < cend) { 5282 col = in[j] - cstart; 5283 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5284 } else if (in[j] < 0) continue; 5285 #if defined(PETSC_USE_DEBUG) 5286 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5287 #endif 5288 else { 5289 if (mat->was_assembled) { 5290 if (!aij->colmap) { 5291 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5292 } 5293 #if defined(PETSC_USE_CTABLE) 5294 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5295 col--; 5296 #else 5297 col = aij->colmap[in[j]] - 1; 5298 #endif 5299 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5300 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5301 col = in[j]; 5302 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5303 B = aij->B; 5304 b = (Mat_SeqAIJ*)B->data; 5305 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5306 rp2 = bj + bi[row]; 5307 ap2 = ba + bi[row]; 5308 rmax2 = bimax[row]; 5309 nrow2 = bilen[row]; 5310 low2 = 0; 5311 high2 = nrow2; 5312 bm = aij->B->rmap->n; 5313 ba = b->a; 5314 } 5315 } else col = in[j]; 5316 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5317 } 5318 } 5319 } else if (!aij->donotstash) { 5320 if (roworiented) { 5321 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5322 } else { 5323 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5324 } 5325 } 5326 } 5327 } 5328 PetscFunctionReturnVoid(); 5329 } 5330 5331