1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRMPI(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRMPI(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRMPI(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRMPI(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRMPI(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRMPI(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRMPI(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRMPI(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRMPI(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRMPI(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to access. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_DEVICE) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_DEVICE) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_DEVICE) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 796 { 797 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 798 PetscErrorCode ierr; 799 PetscInt nstash,reallocs; 800 801 PetscFunctionBegin; 802 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 803 804 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 805 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 806 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 807 PetscFunctionReturn(0); 808 } 809 810 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 811 { 812 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 813 PetscErrorCode ierr; 814 PetscMPIInt n; 815 PetscInt i,j,rstart,ncols,flg; 816 PetscInt *row,*col; 817 PetscBool other_disassembled; 818 PetscScalar *val; 819 820 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 821 822 PetscFunctionBegin; 823 if (!aij->donotstash && !mat->nooffprocentries) { 824 while (1) { 825 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 826 if (!flg) break; 827 828 for (i=0; i<n;) { 829 /* Now identify the consecutive vals belonging to the same row */ 830 for (j=i,rstart=row[j]; j<n; j++) { 831 if (row[j] != rstart) break; 832 } 833 if (j < n) ncols = j-i; 834 else ncols = n-i; 835 /* Now assemble all these values with a single function call */ 836 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 837 i = j; 838 } 839 } 840 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 841 } 842 #if defined(PETSC_HAVE_DEVICE) 843 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 844 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 845 if (mat->boundtocpu) { 846 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 847 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 848 } 849 #endif 850 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 851 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 852 853 /* determine if any processor has disassembled, if so we must 854 also disassemble ourself, in order that we may reassemble. */ 855 /* 856 if nonzero structure of submatrix B cannot change then we know that 857 no processor disassembled thus we can skip this stuff 858 */ 859 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 860 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 861 if (mat->was_assembled && !other_disassembled) { 862 #if defined(PETSC_HAVE_DEVICE) 863 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 864 #endif 865 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 866 } 867 } 868 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 869 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 870 } 871 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 872 #if defined(PETSC_HAVE_DEVICE) 873 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 874 #endif 875 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 876 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 877 878 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 879 880 aij->rowvalues = NULL; 881 882 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 883 884 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 885 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 886 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 887 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 888 } 889 #if defined(PETSC_HAVE_DEVICE) 890 mat->offloadmask = PETSC_OFFLOAD_BOTH; 891 #endif 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 896 { 897 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 902 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 909 PetscObjectState sA, sB; 910 PetscInt *lrows; 911 PetscInt r, len; 912 PetscBool cong, lch, gch; 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 /* get locally owned rows */ 917 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 918 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 919 /* fix right hand side if needed */ 920 if (x && b) { 921 const PetscScalar *xx; 922 PetscScalar *bb; 923 924 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 925 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 926 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 927 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 928 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 929 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 930 } 931 932 sA = mat->A->nonzerostate; 933 sB = mat->B->nonzerostate; 934 935 if (diag != 0.0 && cong) { 936 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 937 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 938 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 939 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 940 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 941 PetscInt nnwA, nnwB; 942 PetscBool nnzA, nnzB; 943 944 nnwA = aijA->nonew; 945 nnwB = aijB->nonew; 946 nnzA = aijA->keepnonzeropattern; 947 nnzB = aijB->keepnonzeropattern; 948 if (!nnzA) { 949 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 950 aijA->nonew = 0; 951 } 952 if (!nnzB) { 953 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 954 aijB->nonew = 0; 955 } 956 /* Must zero here before the next loop */ 957 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 958 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 959 for (r = 0; r < len; ++r) { 960 const PetscInt row = lrows[r] + A->rmap->rstart; 961 if (row >= A->cmap->N) continue; 962 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 963 } 964 aijA->nonew = nnwA; 965 aijB->nonew = nnwB; 966 } else { 967 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 968 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 969 } 970 ierr = PetscFree(lrows);CHKERRQ(ierr); 971 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 974 /* reduce nonzerostate */ 975 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 976 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 977 if (gch) A->nonzerostate++; 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 982 { 983 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscMPIInt n = A->rmap->n; 986 PetscInt i,j,r,m,len = 0; 987 PetscInt *lrows,*owners = A->rmap->range; 988 PetscMPIInt p = 0; 989 PetscSFNode *rrows; 990 PetscSF sf; 991 const PetscScalar *xx; 992 PetscScalar *bb,*mask; 993 Vec xmask,lmask; 994 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 995 const PetscInt *aj, *ii,*ridx; 996 PetscScalar *aa; 997 998 PetscFunctionBegin; 999 /* Create SF where leaves are input rows and roots are owned rows */ 1000 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1001 for (r = 0; r < n; ++r) lrows[r] = -1; 1002 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1003 for (r = 0; r < N; ++r) { 1004 const PetscInt idx = rows[r]; 1005 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1006 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1007 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1008 } 1009 rrows[r].rank = p; 1010 rrows[r].index = rows[r] - owners[p]; 1011 } 1012 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1013 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1014 /* Collect flags for rows to be zeroed */ 1015 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1016 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1017 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1018 /* Compress and put in row numbers */ 1019 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1020 /* zero diagonal part of matrix */ 1021 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1032 PetscBool cong; 1033 1034 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1035 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1036 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1039 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1040 } 1041 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1042 /* remove zeroed rows of off diagonal matrix */ 1043 ii = aij->i; 1044 for (i=0; i<len; i++) { 1045 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1046 } 1047 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1048 if (aij->compressedrow.use) { 1049 m = aij->compressedrow.nrows; 1050 ii = aij->compressedrow.i; 1051 ridx = aij->compressedrow.rindex; 1052 for (i=0; i<m; i++) { 1053 n = ii[i+1] - ii[i]; 1054 aj = aij->j + ii[i]; 1055 aa = aij->a + ii[i]; 1056 1057 for (j=0; j<n; j++) { 1058 if (PetscAbsScalar(mask[*aj])) { 1059 if (b) bb[*ridx] -= *aa*xx[*aj]; 1060 *aa = 0.0; 1061 } 1062 aa++; 1063 aj++; 1064 } 1065 ridx++; 1066 } 1067 } else { /* do not use compressed row format */ 1068 m = l->B->rmap->n; 1069 for (i=0; i<m; i++) { 1070 n = ii[i+1] - ii[i]; 1071 aj = aij->j + ii[i]; 1072 aa = aij->a + ii[i]; 1073 for (j=0; j<n; j++) { 1074 if (PetscAbsScalar(mask[*aj])) { 1075 if (b) bb[i] -= *aa*xx[*aj]; 1076 *aa = 0.0; 1077 } 1078 aa++; 1079 aj++; 1080 } 1081 } 1082 } 1083 if (x && b) { 1084 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1085 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1086 } 1087 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1088 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1089 ierr = PetscFree(lrows);CHKERRQ(ierr); 1090 1091 /* only change matrix nonzero state if pattern was allowed to be changed */ 1092 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1093 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1094 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1095 } 1096 PetscFunctionReturn(0); 1097 } 1098 1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1100 { 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 PetscErrorCode ierr; 1103 PetscInt nt; 1104 VecScatter Mvctx = a->Mvctx; 1105 1106 PetscFunctionBegin; 1107 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1108 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1109 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1111 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 1121 PetscFunctionBegin; 1122 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1127 { 1128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1129 PetscErrorCode ierr; 1130 VecScatter Mvctx = a->Mvctx; 1131 1132 PetscFunctionBegin; 1133 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1135 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1137 PetscFunctionReturn(0); 1138 } 1139 1140 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1141 { 1142 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1143 PetscErrorCode ierr; 1144 1145 PetscFunctionBegin; 1146 /* do nondiagonal part */ 1147 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1148 /* do local part */ 1149 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1150 /* add partial results together */ 1151 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1152 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1157 { 1158 MPI_Comm comm; 1159 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1160 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1161 IS Me,Notme; 1162 PetscErrorCode ierr; 1163 PetscInt M,N,first,last,*notme,i; 1164 PetscBool lf; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 /* Easy test: symmetric diagonal block */ 1169 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1170 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1171 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1199 { 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1208 { 1209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 /* do nondiagonal part */ 1214 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1215 /* do local part */ 1216 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1217 /* add partial results together */ 1218 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1219 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* 1224 This only works correctly for square matrices where the subblock A->A is the 1225 diagonal block 1226 */ 1227 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1228 { 1229 PetscErrorCode ierr; 1230 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1231 1232 PetscFunctionBegin; 1233 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1234 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1235 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1240 { 1241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1242 PetscErrorCode ierr; 1243 1244 PetscFunctionBegin; 1245 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1246 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1251 { 1252 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 #if defined(PETSC_USE_LOG) 1257 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1258 #endif 1259 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1260 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1261 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1263 #if defined(PETSC_USE_CTABLE) 1264 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1265 #else 1266 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1267 #endif 1268 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1269 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1270 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1271 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1272 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1273 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1274 1275 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1276 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1277 1278 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1288 #if defined(PETSC_HAVE_CUDA) 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1290 #endif 1291 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1293 #endif 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1310 #if defined(PETSC_HAVE_MKL_SPARSE) 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1312 #endif 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1320 { 1321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1322 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1323 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1324 const PetscInt *garray = aij->garray; 1325 const PetscScalar *aa,*ba; 1326 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1327 PetscInt *rowlens; 1328 PetscInt *colidxs; 1329 PetscScalar *matvals; 1330 PetscErrorCode ierr; 1331 1332 PetscFunctionBegin; 1333 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1334 1335 M = mat->rmap->N; 1336 N = mat->cmap->N; 1337 m = mat->rmap->n; 1338 rs = mat->rmap->rstart; 1339 cs = mat->cmap->rstart; 1340 nz = A->nz + B->nz; 1341 1342 /* write matrix header */ 1343 header[0] = MAT_FILE_CLASSID; 1344 header[1] = M; header[2] = N; header[3] = nz; 1345 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1346 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1347 1348 /* fill in and store row lengths */ 1349 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1350 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1351 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1352 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1353 1354 /* fill in and store column indices */ 1355 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1356 for (cnt=0, i=0; i<m; i++) { 1357 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1358 if (garray[B->j[jb]] > cs) break; 1359 colidxs[cnt++] = garray[B->j[jb]]; 1360 } 1361 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1362 colidxs[cnt++] = A->j[ja] + cs; 1363 for (; jb<B->i[i+1]; jb++) 1364 colidxs[cnt++] = garray[B->j[jb]]; 1365 } 1366 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1367 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1368 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1369 1370 /* fill in and store nonzero values */ 1371 ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr); 1372 ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr); 1373 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1374 for (cnt=0, i=0; i<m; i++) { 1375 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1376 if (garray[B->j[jb]] > cs) break; 1377 matvals[cnt++] = ba[jb]; 1378 } 1379 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1380 matvals[cnt++] = aa[ja]; 1381 for (; jb<B->i[i+1]; jb++) 1382 matvals[cnt++] = ba[jb]; 1383 } 1384 ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr); 1385 ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr); 1386 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1387 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1388 ierr = PetscFree(matvals);CHKERRQ(ierr); 1389 1390 /* write block size option to the viewer's .info file */ 1391 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #include <petscdraw.h> 1396 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1397 { 1398 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1399 PetscErrorCode ierr; 1400 PetscMPIInt rank = aij->rank,size = aij->size; 1401 PetscBool isdraw,iascii,isbinary; 1402 PetscViewer sviewer; 1403 PetscViewerFormat format; 1404 1405 PetscFunctionBegin; 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1407 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1408 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1409 if (iascii) { 1410 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1411 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1412 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1413 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1414 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1415 for (i=0; i<(PetscInt)size; i++) { 1416 nmax = PetscMax(nmax,nz[i]); 1417 nmin = PetscMin(nmin,nz[i]); 1418 navg += nz[i]; 1419 } 1420 ierr = PetscFree(nz);CHKERRQ(ierr); 1421 navg = navg/size; 1422 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1426 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1427 MatInfo info; 1428 PetscBool inodes; 1429 1430 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 1431 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1432 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1434 if (!inodes) { 1435 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1436 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1437 } else { 1438 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1439 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1440 } 1441 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1442 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1443 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1444 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1445 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1446 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1447 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1448 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1451 PetscInt inodecount,inodelimit,*inodes; 1452 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1453 if (inodes) { 1454 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1455 } else { 1456 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1457 } 1458 PetscFunctionReturn(0); 1459 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1460 PetscFunctionReturn(0); 1461 } 1462 } else if (isbinary) { 1463 if (size == 1) { 1464 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1465 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1466 } else { 1467 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1468 } 1469 PetscFunctionReturn(0); 1470 } else if (iascii && size == 1) { 1471 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1472 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } else if (isdraw) { 1475 PetscDraw draw; 1476 PetscBool isnull; 1477 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1478 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1479 if (isnull) PetscFunctionReturn(0); 1480 } 1481 1482 { /* assemble the entire matrix onto first processor */ 1483 Mat A = NULL, Av; 1484 IS isrow,iscol; 1485 1486 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1487 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1488 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1489 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1490 /* The commented code uses MatCreateSubMatrices instead */ 1491 /* 1492 Mat *AA, A = NULL, Av; 1493 IS isrow,iscol; 1494 1495 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1496 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1497 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1498 if (!rank) { 1499 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1500 A = AA[0]; 1501 Av = AA[0]; 1502 } 1503 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1504 */ 1505 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1506 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1507 /* 1508 Everyone has to call to draw the matrix since the graphics waits are 1509 synchronized across all processors that share the PetscDraw object 1510 */ 1511 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1512 if (!rank) { 1513 if (((PetscObject)mat)->name) { 1514 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1515 } 1516 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1517 } 1518 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1519 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1520 ierr = MatDestroy(&A);CHKERRQ(ierr); 1521 } 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1526 { 1527 PetscErrorCode ierr; 1528 PetscBool iascii,isdraw,issocket,isbinary; 1529 1530 PetscFunctionBegin; 1531 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1532 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1533 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1534 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1535 if (iascii || isdraw || isbinary || issocket) { 1536 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1537 } 1538 PetscFunctionReturn(0); 1539 } 1540 1541 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1542 { 1543 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1544 PetscErrorCode ierr; 1545 Vec bb1 = NULL; 1546 PetscBool hasop; 1547 1548 PetscFunctionBegin; 1549 if (flag == SOR_APPLY_UPPER) { 1550 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1551 PetscFunctionReturn(0); 1552 } 1553 1554 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1555 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1556 } 1557 1558 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1559 if (flag & SOR_ZERO_INITIAL_GUESS) { 1560 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1561 its--; 1562 } 1563 1564 while (its--) { 1565 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 1568 /* update rhs: bb1 = bb - B*x */ 1569 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1570 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1571 1572 /* local sweep */ 1573 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1574 } 1575 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1576 if (flag & SOR_ZERO_INITIAL_GUESS) { 1577 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1578 its--; 1579 } 1580 while (its--) { 1581 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 1584 /* update rhs: bb1 = bb - B*x */ 1585 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1586 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1587 1588 /* local sweep */ 1589 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1590 } 1591 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1592 if (flag & SOR_ZERO_INITIAL_GUESS) { 1593 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1594 its--; 1595 } 1596 while (its--) { 1597 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1598 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1599 1600 /* update rhs: bb1 = bb - B*x */ 1601 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1602 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1603 1604 /* local sweep */ 1605 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1606 } 1607 } else if (flag & SOR_EISENSTAT) { 1608 Vec xx1; 1609 1610 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1611 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1612 1613 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1615 if (!mat->diag) { 1616 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1617 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1618 } 1619 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1620 if (hasop) { 1621 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1622 } else { 1623 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1624 } 1625 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1626 1627 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1628 1629 /* local sweep */ 1630 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1631 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1632 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1633 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1634 1635 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1636 1637 matin->factorerrortype = mat->A->factorerrortype; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1642 { 1643 Mat aA,aB,Aperm; 1644 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1645 PetscScalar *aa,*ba; 1646 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1647 PetscSF rowsf,sf; 1648 IS parcolp = NULL; 1649 PetscBool done; 1650 PetscErrorCode ierr; 1651 1652 PetscFunctionBegin; 1653 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1654 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1655 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1656 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1657 1658 /* Invert row permutation to find out where my rows should go */ 1659 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1660 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1661 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1662 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1663 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1665 1666 /* Invert column permutation to find out where my columns should go */ 1667 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1668 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1669 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1670 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1671 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1672 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1676 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1677 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1678 1679 /* Find out where my gcols should go */ 1680 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1681 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1682 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1683 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1684 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1685 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1686 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1687 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1688 1689 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1690 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1691 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1692 for (i=0; i<m; i++) { 1693 PetscInt row = rdest[i]; 1694 PetscMPIInt rowner; 1695 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1696 for (j=ai[i]; j<ai[i+1]; j++) { 1697 PetscInt col = cdest[aj[j]]; 1698 PetscMPIInt cowner; 1699 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1700 if (rowner == cowner) dnnz[i]++; 1701 else onnz[i]++; 1702 } 1703 for (j=bi[i]; j<bi[i+1]; j++) { 1704 PetscInt col = gcdest[bj[j]]; 1705 PetscMPIInt cowner; 1706 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1707 if (rowner == cowner) dnnz[i]++; 1708 else onnz[i]++; 1709 } 1710 } 1711 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1712 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1713 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1714 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1715 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1716 1717 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1718 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1719 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1720 for (i=0; i<m; i++) { 1721 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1722 PetscInt j0,rowlen; 1723 rowlen = ai[i+1] - ai[i]; 1724 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1725 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1726 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1727 } 1728 rowlen = bi[i+1] - bi[i]; 1729 for (j0=j=0; j<rowlen; j0=j) { 1730 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1731 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1732 } 1733 } 1734 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1735 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1736 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1737 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1738 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1739 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1740 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1741 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1742 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1743 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1744 *B = Aperm; 1745 PetscFunctionReturn(0); 1746 } 1747 1748 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1749 { 1750 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1751 PetscErrorCode ierr; 1752 1753 PetscFunctionBegin; 1754 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1755 if (ghosts) *ghosts = aij->garray; 1756 PetscFunctionReturn(0); 1757 } 1758 1759 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1760 { 1761 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1762 Mat A = mat->A,B = mat->B; 1763 PetscErrorCode ierr; 1764 PetscLogDouble isend[5],irecv[5]; 1765 1766 PetscFunctionBegin; 1767 info->block_size = 1.0; 1768 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1769 1770 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1771 isend[3] = info->memory; isend[4] = info->mallocs; 1772 1773 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1774 1775 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1776 isend[3] += info->memory; isend[4] += info->mallocs; 1777 if (flag == MAT_LOCAL) { 1778 info->nz_used = isend[0]; 1779 info->nz_allocated = isend[1]; 1780 info->nz_unneeded = isend[2]; 1781 info->memory = isend[3]; 1782 info->mallocs = isend[4]; 1783 } else if (flag == MAT_GLOBAL_MAX) { 1784 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1785 1786 info->nz_used = irecv[0]; 1787 info->nz_allocated = irecv[1]; 1788 info->nz_unneeded = irecv[2]; 1789 info->memory = irecv[3]; 1790 info->mallocs = irecv[4]; 1791 } else if (flag == MAT_GLOBAL_SUM) { 1792 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1793 1794 info->nz_used = irecv[0]; 1795 info->nz_allocated = irecv[1]; 1796 info->nz_unneeded = irecv[2]; 1797 info->memory = irecv[3]; 1798 info->mallocs = irecv[4]; 1799 } 1800 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1801 info->fill_ratio_needed = 0; 1802 info->factor_mallocs = 0; 1803 PetscFunctionReturn(0); 1804 } 1805 1806 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1807 { 1808 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1809 PetscErrorCode ierr; 1810 1811 PetscFunctionBegin; 1812 switch (op) { 1813 case MAT_NEW_NONZERO_LOCATIONS: 1814 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1815 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1816 case MAT_KEEP_NONZERO_PATTERN: 1817 case MAT_NEW_NONZERO_LOCATION_ERR: 1818 case MAT_USE_INODES: 1819 case MAT_IGNORE_ZERO_ENTRIES: 1820 MatCheckPreallocated(A,1); 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1823 break; 1824 case MAT_ROW_ORIENTED: 1825 MatCheckPreallocated(A,1); 1826 a->roworiented = flg; 1827 1828 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1829 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1830 break; 1831 case MAT_FORCE_DIAGONAL_ENTRIES: 1832 case MAT_SORTED_FULL: 1833 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1834 break; 1835 case MAT_IGNORE_OFF_PROC_ENTRIES: 1836 a->donotstash = flg; 1837 break; 1838 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1839 case MAT_SPD: 1840 case MAT_SYMMETRIC: 1841 case MAT_STRUCTURALLY_SYMMETRIC: 1842 case MAT_HERMITIAN: 1843 case MAT_SYMMETRY_ETERNAL: 1844 break; 1845 case MAT_SUBMAT_SINGLEIS: 1846 A->submat_singleis = flg; 1847 break; 1848 case MAT_STRUCTURE_ONLY: 1849 /* The option is handled directly by MatSetOption() */ 1850 break; 1851 default: 1852 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1853 } 1854 PetscFunctionReturn(0); 1855 } 1856 1857 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1858 { 1859 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1860 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1861 PetscErrorCode ierr; 1862 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1863 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1864 PetscInt *cmap,*idx_p; 1865 1866 PetscFunctionBegin; 1867 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1868 mat->getrowactive = PETSC_TRUE; 1869 1870 if (!mat->rowvalues && (idx || v)) { 1871 /* 1872 allocate enough space to hold information from the longest row. 1873 */ 1874 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1875 PetscInt max = 1,tmp; 1876 for (i=0; i<matin->rmap->n; i++) { 1877 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1878 if (max < tmp) max = tmp; 1879 } 1880 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1881 } 1882 1883 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1884 lrow = row - rstart; 1885 1886 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1887 if (!v) {pvA = NULL; pvB = NULL;} 1888 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1889 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1890 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1891 nztot = nzA + nzB; 1892 1893 cmap = mat->garray; 1894 if (v || idx) { 1895 if (nztot) { 1896 /* Sort by increasing column numbers, assuming A and B already sorted */ 1897 PetscInt imark = -1; 1898 if (v) { 1899 *v = v_p = mat->rowvalues; 1900 for (i=0; i<nzB; i++) { 1901 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1902 else break; 1903 } 1904 imark = i; 1905 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1906 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1907 } 1908 if (idx) { 1909 *idx = idx_p = mat->rowindices; 1910 if (imark > -1) { 1911 for (i=0; i<imark; i++) { 1912 idx_p[i] = cmap[cworkB[i]]; 1913 } 1914 } else { 1915 for (i=0; i<nzB; i++) { 1916 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1917 else break; 1918 } 1919 imark = i; 1920 } 1921 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1922 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1923 } 1924 } else { 1925 if (idx) *idx = NULL; 1926 if (v) *v = NULL; 1927 } 1928 } 1929 *nz = nztot; 1930 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1931 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1932 PetscFunctionReturn(0); 1933 } 1934 1935 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1936 { 1937 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1938 1939 PetscFunctionBegin; 1940 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1941 aij->getrowactive = PETSC_FALSE; 1942 PetscFunctionReturn(0); 1943 } 1944 1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1946 { 1947 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1948 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1949 PetscErrorCode ierr; 1950 PetscInt i,j,cstart = mat->cmap->rstart; 1951 PetscReal sum = 0.0; 1952 MatScalar *v; 1953 1954 PetscFunctionBegin; 1955 if (aij->size == 1) { 1956 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1957 } else { 1958 if (type == NORM_FROBENIUS) { 1959 v = amat->a; 1960 for (i=0; i<amat->nz; i++) { 1961 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1962 } 1963 v = bmat->a; 1964 for (i=0; i<bmat->nz; i++) { 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 } 1967 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1968 *norm = PetscSqrtReal(*norm); 1969 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1970 } else if (type == NORM_1) { /* max column norm */ 1971 PetscReal *tmp,*tmp2; 1972 PetscInt *jj,*garray = aij->garray; 1973 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1974 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1975 *norm = 0.0; 1976 v = amat->a; jj = amat->j; 1977 for (j=0; j<amat->nz; j++) { 1978 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1979 } 1980 v = bmat->a; jj = bmat->j; 1981 for (j=0; j<bmat->nz; j++) { 1982 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1983 } 1984 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1985 for (j=0; j<mat->cmap->N; j++) { 1986 if (tmp2[j] > *norm) *norm = tmp2[j]; 1987 } 1988 ierr = PetscFree(tmp);CHKERRQ(ierr); 1989 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1990 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1991 } else if (type == NORM_INFINITY) { /* max row norm */ 1992 PetscReal ntemp = 0.0; 1993 for (j=0; j<aij->A->rmap->n; j++) { 1994 v = amat->a + amat->i[j]; 1995 sum = 0.0; 1996 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1997 sum += PetscAbsScalar(*v); v++; 1998 } 1999 v = bmat->a + bmat->i[j]; 2000 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 if (sum > ntemp) ntemp = sum; 2004 } 2005 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2006 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2007 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2013 { 2014 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2015 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2016 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2017 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2018 PetscErrorCode ierr; 2019 Mat B,A_diag,*B_diag; 2020 const MatScalar *array; 2021 2022 PetscFunctionBegin; 2023 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2024 ai = Aloc->i; aj = Aloc->j; 2025 bi = Bloc->i; bj = Bloc->j; 2026 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2027 PetscInt *d_nnz,*g_nnz,*o_nnz; 2028 PetscSFNode *oloc; 2029 PETSC_UNUSED PetscSF sf; 2030 2031 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2032 /* compute d_nnz for preallocation */ 2033 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2034 for (i=0; i<ai[ma]; i++) { 2035 d_nnz[aj[i]]++; 2036 } 2037 /* compute local off-diagonal contributions */ 2038 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2039 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2040 /* map those to global */ 2041 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2042 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2043 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2044 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2045 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2046 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2047 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2048 2049 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2050 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2051 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2052 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2053 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2054 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2055 } else { 2056 B = *matout; 2057 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2058 } 2059 2060 b = (Mat_MPIAIJ*)B->data; 2061 A_diag = a->A; 2062 B_diag = &b->A; 2063 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2064 A_diag_ncol = A_diag->cmap->N; 2065 B_diag_ilen = sub_B_diag->ilen; 2066 B_diag_i = sub_B_diag->i; 2067 2068 /* Set ilen for diagonal of B */ 2069 for (i=0; i<A_diag_ncol; i++) { 2070 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2071 } 2072 2073 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2074 very quickly (=without using MatSetValues), because all writes are local. */ 2075 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2076 2077 /* copy over the B part */ 2078 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2079 array = Bloc->a; 2080 row = A->rmap->rstart; 2081 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2082 cols_tmp = cols; 2083 for (i=0; i<mb; i++) { 2084 ncol = bi[i+1]-bi[i]; 2085 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2086 row++; 2087 array += ncol; cols_tmp += ncol; 2088 } 2089 ierr = PetscFree(cols);CHKERRQ(ierr); 2090 2091 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2093 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2094 *matout = B; 2095 } else { 2096 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2097 } 2098 PetscFunctionReturn(0); 2099 } 2100 2101 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2102 { 2103 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2104 Mat a = aij->A,b = aij->B; 2105 PetscErrorCode ierr; 2106 PetscInt s1,s2,s3; 2107 2108 PetscFunctionBegin; 2109 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2110 if (rr) { 2111 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2112 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2113 /* Overlap communication with computation. */ 2114 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 } 2116 if (ll) { 2117 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2118 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2119 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2120 } 2121 /* scale the diagonal block */ 2122 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2123 2124 if (rr) { 2125 /* Do a scatter end and then right scale the off-diagonal block */ 2126 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2127 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2128 } 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2133 { 2134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2143 { 2144 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2145 Mat a,b,c,d; 2146 PetscBool flg; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 a = matA->A; b = matA->B; 2151 c = matB->A; d = matB->B; 2152 2153 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2154 if (flg) { 2155 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2156 } 2157 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2162 { 2163 PetscErrorCode ierr; 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2165 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2166 2167 PetscFunctionBegin; 2168 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2169 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2170 /* because of the column compression in the off-processor part of the matrix a->B, 2171 the number of columns in a->B and b->B may be different, hence we cannot call 2172 the MatCopy() directly on the two parts. If need be, we can provide a more 2173 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2174 then copying the submatrices */ 2175 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2176 } else { 2177 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2178 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2179 } 2180 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2185 { 2186 PetscErrorCode ierr; 2187 2188 PetscFunctionBegin; 2189 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 /* 2194 Computes the number of nonzeros per row needed for preallocation when X and Y 2195 have different nonzero structure. 2196 */ 2197 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2198 { 2199 PetscInt i,j,k,nzx,nzy; 2200 2201 PetscFunctionBegin; 2202 /* Set the number of nonzeros in the new matrix */ 2203 for (i=0; i<m; i++) { 2204 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2205 nzx = xi[i+1] - xi[i]; 2206 nzy = yi[i+1] - yi[i]; 2207 nnz[i] = 0; 2208 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2209 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2210 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2211 nnz[i]++; 2212 } 2213 for (; k<nzy; k++) nnz[i]++; 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 2218 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2219 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2220 { 2221 PetscErrorCode ierr; 2222 PetscInt m = Y->rmap->N; 2223 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2224 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2225 2226 PetscFunctionBegin; 2227 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2232 { 2233 PetscErrorCode ierr; 2234 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2235 PetscBLASInt bnz,one=1; 2236 Mat_SeqAIJ *x,*y; 2237 2238 PetscFunctionBegin; 2239 if (str == SAME_NONZERO_PATTERN) { 2240 PetscScalar alpha = a; 2241 x = (Mat_SeqAIJ*)xx->A->data; 2242 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2243 y = (Mat_SeqAIJ*)yy->A->data; 2244 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2245 x = (Mat_SeqAIJ*)xx->B->data; 2246 y = (Mat_SeqAIJ*)yy->B->data; 2247 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2248 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2249 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2250 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2251 will be updated */ 2252 #if defined(PETSC_HAVE_DEVICE) 2253 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2254 Y->offloadmask = PETSC_OFFLOAD_CPU; 2255 } 2256 #endif 2257 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2258 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2259 } else { 2260 Mat B; 2261 PetscInt *nnz_d,*nnz_o; 2262 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2263 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2264 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2265 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2266 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2267 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2268 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2269 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2270 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2271 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2272 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2273 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2274 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2275 } 2276 PetscFunctionReturn(0); 2277 } 2278 2279 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2280 2281 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2282 { 2283 #if defined(PETSC_USE_COMPLEX) 2284 PetscErrorCode ierr; 2285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2286 2287 PetscFunctionBegin; 2288 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2289 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2290 #else 2291 PetscFunctionBegin; 2292 #endif 2293 PetscFunctionReturn(0); 2294 } 2295 2296 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2297 { 2298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2303 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2308 { 2309 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2314 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2315 PetscFunctionReturn(0); 2316 } 2317 2318 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2319 { 2320 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2321 PetscErrorCode ierr; 2322 PetscInt i,*idxb = NULL,m = A->rmap->n; 2323 PetscScalar *va,*vv; 2324 Vec vB,vA; 2325 const PetscScalar *vb; 2326 2327 PetscFunctionBegin; 2328 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2329 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2330 2331 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2332 if (idx) { 2333 for (i=0; i<m; i++) { 2334 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2335 } 2336 } 2337 2338 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2339 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2340 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2341 2342 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2343 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2344 for (i=0; i<m; i++) { 2345 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2346 vv[i] = vb[i]; 2347 if (idx) idx[i] = a->garray[idxb[i]]; 2348 } else { 2349 vv[i] = va[i]; 2350 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2351 idx[i] = a->garray[idxb[i]]; 2352 } 2353 } 2354 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2355 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2356 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2357 ierr = PetscFree(idxb);CHKERRQ(ierr); 2358 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2359 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2360 PetscFunctionReturn(0); 2361 } 2362 2363 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2364 { 2365 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2366 PetscInt m = A->rmap->n,n = A->cmap->n; 2367 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2368 PetscInt *cmap = mat->garray; 2369 PetscInt *diagIdx, *offdiagIdx; 2370 Vec diagV, offdiagV; 2371 PetscScalar *a, *diagA, *offdiagA, *ba; 2372 PetscInt r,j,col,ncols,*bi,*bj; 2373 PetscErrorCode ierr; 2374 Mat B = mat->B; 2375 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2376 2377 PetscFunctionBegin; 2378 /* When a process holds entire A and other processes have no entry */ 2379 if (A->cmap->N == n) { 2380 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2381 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2382 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2383 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2384 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } else if (n == 0) { 2387 if (m) { 2388 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2389 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2390 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2391 } 2392 PetscFunctionReturn(0); 2393 } 2394 2395 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2396 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2397 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2398 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2399 2400 /* Get offdiagIdx[] for implicit 0.0 */ 2401 ba = b->a; 2402 bi = b->i; 2403 bj = b->j; 2404 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2405 for (r = 0; r < m; r++) { 2406 ncols = bi[r+1] - bi[r]; 2407 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2408 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2409 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2410 offdiagA[r] = 0.0; 2411 2412 /* Find first hole in the cmap */ 2413 for (j=0; j<ncols; j++) { 2414 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2415 if (col > j && j < cstart) { 2416 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2417 break; 2418 } else if (col > j + n && j >= cstart) { 2419 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2420 break; 2421 } 2422 } 2423 if (j == ncols && ncols < A->cmap->N - n) { 2424 /* a hole is outside compressed Bcols */ 2425 if (ncols == 0) { 2426 if (cstart) { 2427 offdiagIdx[r] = 0; 2428 } else offdiagIdx[r] = cend; 2429 } else { /* ncols > 0 */ 2430 offdiagIdx[r] = cmap[ncols-1] + 1; 2431 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2432 } 2433 } 2434 } 2435 2436 for (j=0; j<ncols; j++) { 2437 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2438 ba++; bj++; 2439 } 2440 } 2441 2442 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2443 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2444 for (r = 0; r < m; ++r) { 2445 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2446 a[r] = diagA[r]; 2447 if (idx) idx[r] = cstart + diagIdx[r]; 2448 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2449 a[r] = diagA[r]; 2450 if (idx) { 2451 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2452 idx[r] = cstart + diagIdx[r]; 2453 } else idx[r] = offdiagIdx[r]; 2454 } 2455 } else { 2456 a[r] = offdiagA[r]; 2457 if (idx) idx[r] = offdiagIdx[r]; 2458 } 2459 } 2460 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2461 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2462 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2463 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2464 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2465 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2470 { 2471 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2472 PetscInt m = A->rmap->n,n = A->cmap->n; 2473 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2474 PetscInt *cmap = mat->garray; 2475 PetscInt *diagIdx, *offdiagIdx; 2476 Vec diagV, offdiagV; 2477 PetscScalar *a, *diagA, *offdiagA, *ba; 2478 PetscInt r,j,col,ncols,*bi,*bj; 2479 PetscErrorCode ierr; 2480 Mat B = mat->B; 2481 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2482 2483 PetscFunctionBegin; 2484 /* When a process holds entire A and other processes have no entry */ 2485 if (A->cmap->N == n) { 2486 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2487 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2488 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2489 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2490 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } else if (n == 0) { 2493 if (m) { 2494 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2495 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2496 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2502 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2503 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2504 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2505 2506 /* Get offdiagIdx[] for implicit 0.0 */ 2507 ba = b->a; 2508 bi = b->i; 2509 bj = b->j; 2510 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2511 for (r = 0; r < m; r++) { 2512 ncols = bi[r+1] - bi[r]; 2513 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2514 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2515 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2516 offdiagA[r] = 0.0; 2517 2518 /* Find first hole in the cmap */ 2519 for (j=0; j<ncols; j++) { 2520 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2521 if (col > j && j < cstart) { 2522 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2523 break; 2524 } else if (col > j + n && j >= cstart) { 2525 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2526 break; 2527 } 2528 } 2529 if (j == ncols && ncols < A->cmap->N - n) { 2530 /* a hole is outside compressed Bcols */ 2531 if (ncols == 0) { 2532 if (cstart) { 2533 offdiagIdx[r] = 0; 2534 } else offdiagIdx[r] = cend; 2535 } else { /* ncols > 0 */ 2536 offdiagIdx[r] = cmap[ncols-1] + 1; 2537 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2538 } 2539 } 2540 } 2541 2542 for (j=0; j<ncols; j++) { 2543 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2544 ba++; bj++; 2545 } 2546 } 2547 2548 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2549 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2550 for (r = 0; r < m; ++r) { 2551 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2552 a[r] = diagA[r]; 2553 if (idx) idx[r] = cstart + diagIdx[r]; 2554 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2555 a[r] = diagA[r]; 2556 if (idx) { 2557 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2558 idx[r] = cstart + diagIdx[r]; 2559 } else idx[r] = offdiagIdx[r]; 2560 } 2561 } else { 2562 a[r] = offdiagA[r]; 2563 if (idx) idx[r] = offdiagIdx[r]; 2564 } 2565 } 2566 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2567 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2568 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2569 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2570 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2571 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2576 { 2577 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2578 PetscInt m = A->rmap->n,n = A->cmap->n; 2579 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2580 PetscInt *cmap = mat->garray; 2581 PetscInt *diagIdx, *offdiagIdx; 2582 Vec diagV, offdiagV; 2583 PetscScalar *a, *diagA, *offdiagA, *ba; 2584 PetscInt r,j,col,ncols,*bi,*bj; 2585 PetscErrorCode ierr; 2586 Mat B = mat->B; 2587 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2588 2589 PetscFunctionBegin; 2590 /* When a process holds entire A and other processes have no entry */ 2591 if (A->cmap->N == n) { 2592 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2593 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2594 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2595 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2596 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2597 PetscFunctionReturn(0); 2598 } else if (n == 0) { 2599 if (m) { 2600 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2601 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2602 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2603 } 2604 PetscFunctionReturn(0); 2605 } 2606 2607 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2608 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2609 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2610 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2611 2612 /* Get offdiagIdx[] for implicit 0.0 */ 2613 ba = b->a; 2614 bi = b->i; 2615 bj = b->j; 2616 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2617 for (r = 0; r < m; r++) { 2618 ncols = bi[r+1] - bi[r]; 2619 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2620 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2621 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2622 offdiagA[r] = 0.0; 2623 2624 /* Find first hole in the cmap */ 2625 for (j=0; j<ncols; j++) { 2626 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2627 if (col > j && j < cstart) { 2628 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2629 break; 2630 } else if (col > j + n && j >= cstart) { 2631 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2632 break; 2633 } 2634 } 2635 if (j == ncols && ncols < A->cmap->N - n) { 2636 /* a hole is outside compressed Bcols */ 2637 if (ncols == 0) { 2638 if (cstart) { 2639 offdiagIdx[r] = 0; 2640 } else offdiagIdx[r] = cend; 2641 } else { /* ncols > 0 */ 2642 offdiagIdx[r] = cmap[ncols-1] + 1; 2643 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2644 } 2645 } 2646 } 2647 2648 for (j=0; j<ncols; j++) { 2649 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2650 ba++; bj++; 2651 } 2652 } 2653 2654 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2655 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2656 for (r = 0; r < m; ++r) { 2657 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2658 a[r] = diagA[r]; 2659 if (idx) idx[r] = cstart + diagIdx[r]; 2660 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2661 a[r] = diagA[r]; 2662 if (idx) { 2663 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2664 idx[r] = cstart + diagIdx[r]; 2665 } else idx[r] = offdiagIdx[r]; 2666 } 2667 } else { 2668 a[r] = offdiagA[r]; 2669 if (idx) idx[r] = offdiagIdx[r]; 2670 } 2671 } 2672 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2673 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2674 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2675 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2676 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2677 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2678 PetscFunctionReturn(0); 2679 } 2680 2681 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2682 { 2683 PetscErrorCode ierr; 2684 Mat *dummy; 2685 2686 PetscFunctionBegin; 2687 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2688 *newmat = *dummy; 2689 ierr = PetscFree(dummy);CHKERRQ(ierr); 2690 PetscFunctionReturn(0); 2691 } 2692 2693 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2694 { 2695 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2696 PetscErrorCode ierr; 2697 2698 PetscFunctionBegin; 2699 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2700 A->factorerrortype = a->A->factorerrortype; 2701 PetscFunctionReturn(0); 2702 } 2703 2704 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2705 { 2706 PetscErrorCode ierr; 2707 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2708 2709 PetscFunctionBegin; 2710 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2711 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2712 if (x->assembled) { 2713 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2714 } else { 2715 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2716 } 2717 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2718 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2723 { 2724 PetscFunctionBegin; 2725 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2726 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2727 PetscFunctionReturn(0); 2728 } 2729 2730 /*@ 2731 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2732 2733 Collective on Mat 2734 2735 Input Parameters: 2736 + A - the matrix 2737 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2738 2739 Level: advanced 2740 2741 @*/ 2742 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2743 { 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2752 { 2753 PetscErrorCode ierr; 2754 PetscBool sc = PETSC_FALSE,flg; 2755 2756 PetscFunctionBegin; 2757 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2758 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2759 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2760 if (flg) { 2761 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2762 } 2763 ierr = PetscOptionsTail();CHKERRQ(ierr); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2768 { 2769 PetscErrorCode ierr; 2770 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2771 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2772 2773 PetscFunctionBegin; 2774 if (!Y->preallocated) { 2775 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2776 } else if (!aij->nz) { 2777 PetscInt nonew = aij->nonew; 2778 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2779 aij->nonew = nonew; 2780 } 2781 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2782 PetscFunctionReturn(0); 2783 } 2784 2785 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2786 { 2787 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2788 PetscErrorCode ierr; 2789 2790 PetscFunctionBegin; 2791 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2792 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2793 if (d) { 2794 PetscInt rstart; 2795 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2796 *d += rstart; 2797 2798 } 2799 PetscFunctionReturn(0); 2800 } 2801 2802 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2803 { 2804 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2805 PetscErrorCode ierr; 2806 2807 PetscFunctionBegin; 2808 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2809 PetscFunctionReturn(0); 2810 } 2811 2812 /* -------------------------------------------------------------------*/ 2813 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2814 MatGetRow_MPIAIJ, 2815 MatRestoreRow_MPIAIJ, 2816 MatMult_MPIAIJ, 2817 /* 4*/ MatMultAdd_MPIAIJ, 2818 MatMultTranspose_MPIAIJ, 2819 MatMultTransposeAdd_MPIAIJ, 2820 NULL, 2821 NULL, 2822 NULL, 2823 /*10*/ NULL, 2824 NULL, 2825 NULL, 2826 MatSOR_MPIAIJ, 2827 MatTranspose_MPIAIJ, 2828 /*15*/ MatGetInfo_MPIAIJ, 2829 MatEqual_MPIAIJ, 2830 MatGetDiagonal_MPIAIJ, 2831 MatDiagonalScale_MPIAIJ, 2832 MatNorm_MPIAIJ, 2833 /*20*/ MatAssemblyBegin_MPIAIJ, 2834 MatAssemblyEnd_MPIAIJ, 2835 MatSetOption_MPIAIJ, 2836 MatZeroEntries_MPIAIJ, 2837 /*24*/ MatZeroRows_MPIAIJ, 2838 NULL, 2839 NULL, 2840 NULL, 2841 NULL, 2842 /*29*/ MatSetUp_MPIAIJ, 2843 NULL, 2844 NULL, 2845 MatGetDiagonalBlock_MPIAIJ, 2846 NULL, 2847 /*34*/ MatDuplicate_MPIAIJ, 2848 NULL, 2849 NULL, 2850 NULL, 2851 NULL, 2852 /*39*/ MatAXPY_MPIAIJ, 2853 MatCreateSubMatrices_MPIAIJ, 2854 MatIncreaseOverlap_MPIAIJ, 2855 MatGetValues_MPIAIJ, 2856 MatCopy_MPIAIJ, 2857 /*44*/ MatGetRowMax_MPIAIJ, 2858 MatScale_MPIAIJ, 2859 MatShift_MPIAIJ, 2860 MatDiagonalSet_MPIAIJ, 2861 MatZeroRowsColumns_MPIAIJ, 2862 /*49*/ MatSetRandom_MPIAIJ, 2863 NULL, 2864 NULL, 2865 NULL, 2866 NULL, 2867 /*54*/ MatFDColoringCreate_MPIXAIJ, 2868 NULL, 2869 MatSetUnfactored_MPIAIJ, 2870 MatPermute_MPIAIJ, 2871 NULL, 2872 /*59*/ MatCreateSubMatrix_MPIAIJ, 2873 MatDestroy_MPIAIJ, 2874 MatView_MPIAIJ, 2875 NULL, 2876 NULL, 2877 /*64*/ NULL, 2878 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2879 NULL, 2880 NULL, 2881 NULL, 2882 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2883 MatGetRowMinAbs_MPIAIJ, 2884 NULL, 2885 NULL, 2886 NULL, 2887 NULL, 2888 /*75*/ MatFDColoringApply_AIJ, 2889 MatSetFromOptions_MPIAIJ, 2890 NULL, 2891 NULL, 2892 MatFindZeroDiagonals_MPIAIJ, 2893 /*80*/ NULL, 2894 NULL, 2895 NULL, 2896 /*83*/ MatLoad_MPIAIJ, 2897 MatIsSymmetric_MPIAIJ, 2898 NULL, 2899 NULL, 2900 NULL, 2901 NULL, 2902 /*89*/ NULL, 2903 NULL, 2904 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2905 NULL, 2906 NULL, 2907 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2908 NULL, 2909 NULL, 2910 NULL, 2911 MatBindToCPU_MPIAIJ, 2912 /*99*/ MatProductSetFromOptions_MPIAIJ, 2913 NULL, 2914 NULL, 2915 MatConjugate_MPIAIJ, 2916 NULL, 2917 /*104*/MatSetValuesRow_MPIAIJ, 2918 MatRealPart_MPIAIJ, 2919 MatImaginaryPart_MPIAIJ, 2920 NULL, 2921 NULL, 2922 /*109*/NULL, 2923 NULL, 2924 MatGetRowMin_MPIAIJ, 2925 NULL, 2926 MatMissingDiagonal_MPIAIJ, 2927 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2928 NULL, 2929 MatGetGhosts_MPIAIJ, 2930 NULL, 2931 NULL, 2932 /*119*/MatMultDiagonalBlock_MPIAIJ, 2933 NULL, 2934 NULL, 2935 NULL, 2936 MatGetMultiProcBlock_MPIAIJ, 2937 /*124*/MatFindNonzeroRows_MPIAIJ, 2938 MatGetColumnNorms_MPIAIJ, 2939 MatInvertBlockDiagonal_MPIAIJ, 2940 MatInvertVariableBlockDiagonal_MPIAIJ, 2941 MatCreateSubMatricesMPI_MPIAIJ, 2942 /*129*/NULL, 2943 NULL, 2944 NULL, 2945 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2946 NULL, 2947 /*134*/NULL, 2948 NULL, 2949 NULL, 2950 NULL, 2951 NULL, 2952 /*139*/MatSetBlockSizes_MPIAIJ, 2953 NULL, 2954 NULL, 2955 MatFDColoringSetUp_MPIXAIJ, 2956 MatFindOffBlockDiagonalEntries_MPIAIJ, 2957 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2958 /*145*/NULL, 2959 NULL, 2960 NULL 2961 }; 2962 2963 /* ----------------------------------------------------------------------------------------*/ 2964 2965 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2966 { 2967 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2968 PetscErrorCode ierr; 2969 2970 PetscFunctionBegin; 2971 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2972 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2973 PetscFunctionReturn(0); 2974 } 2975 2976 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2977 { 2978 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2979 PetscErrorCode ierr; 2980 2981 PetscFunctionBegin; 2982 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2983 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2984 PetscFunctionReturn(0); 2985 } 2986 2987 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2988 { 2989 Mat_MPIAIJ *b; 2990 PetscErrorCode ierr; 2991 PetscMPIInt size; 2992 2993 PetscFunctionBegin; 2994 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2995 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2996 b = (Mat_MPIAIJ*)B->data; 2997 2998 #if defined(PETSC_USE_CTABLE) 2999 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3000 #else 3001 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3002 #endif 3003 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3004 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3005 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3006 3007 /* Because the B will have been resized we simply destroy it and create a new one each time */ 3008 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3009 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3010 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3011 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 3012 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3013 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3014 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3015 3016 if (!B->preallocated) { 3017 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3018 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3019 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3020 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3021 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3022 } 3023 3024 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3025 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3026 B->preallocated = PETSC_TRUE; 3027 B->was_assembled = PETSC_FALSE; 3028 B->assembled = PETSC_FALSE; 3029 PetscFunctionReturn(0); 3030 } 3031 3032 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 3033 { 3034 Mat_MPIAIJ *b; 3035 PetscErrorCode ierr; 3036 3037 PetscFunctionBegin; 3038 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3039 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3040 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3041 b = (Mat_MPIAIJ*)B->data; 3042 3043 #if defined(PETSC_USE_CTABLE) 3044 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3045 #else 3046 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3047 #endif 3048 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3049 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3050 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3051 3052 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 3053 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 3054 B->preallocated = PETSC_TRUE; 3055 B->was_assembled = PETSC_FALSE; 3056 B->assembled = PETSC_FALSE; 3057 PetscFunctionReturn(0); 3058 } 3059 3060 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3061 { 3062 Mat mat; 3063 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3064 PetscErrorCode ierr; 3065 3066 PetscFunctionBegin; 3067 *newmat = NULL; 3068 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3069 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3070 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3071 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3072 a = (Mat_MPIAIJ*)mat->data; 3073 3074 mat->factortype = matin->factortype; 3075 mat->assembled = matin->assembled; 3076 mat->insertmode = NOT_SET_VALUES; 3077 mat->preallocated = matin->preallocated; 3078 3079 a->size = oldmat->size; 3080 a->rank = oldmat->rank; 3081 a->donotstash = oldmat->donotstash; 3082 a->roworiented = oldmat->roworiented; 3083 a->rowindices = NULL; 3084 a->rowvalues = NULL; 3085 a->getrowactive = PETSC_FALSE; 3086 3087 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3088 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3089 3090 if (oldmat->colmap) { 3091 #if defined(PETSC_USE_CTABLE) 3092 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3093 #else 3094 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 3095 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3096 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 3097 #endif 3098 } else a->colmap = NULL; 3099 if (oldmat->garray) { 3100 PetscInt len; 3101 len = oldmat->B->cmap->n; 3102 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 3103 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3104 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 3105 } else a->garray = NULL; 3106 3107 /* It may happen MatDuplicate is called with a non-assembled matrix 3108 In fact, MatDuplicate only requires the matrix to be preallocated 3109 This may happen inside a DMCreateMatrix_Shell */ 3110 if (oldmat->lvec) { 3111 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3112 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3113 } 3114 if (oldmat->Mvctx) { 3115 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3116 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3117 } 3118 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3119 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3120 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3121 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3122 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3123 *newmat = mat; 3124 PetscFunctionReturn(0); 3125 } 3126 3127 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3128 { 3129 PetscBool isbinary, ishdf5; 3130 PetscErrorCode ierr; 3131 3132 PetscFunctionBegin; 3133 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3134 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3135 /* force binary viewer to load .info file if it has not yet done so */ 3136 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3137 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3138 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3139 if (isbinary) { 3140 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3141 } else if (ishdf5) { 3142 #if defined(PETSC_HAVE_HDF5) 3143 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3144 #else 3145 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3146 #endif 3147 } else { 3148 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3149 } 3150 PetscFunctionReturn(0); 3151 } 3152 3153 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3154 { 3155 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3156 PetscInt *rowidxs,*colidxs; 3157 PetscScalar *matvals; 3158 PetscErrorCode ierr; 3159 3160 PetscFunctionBegin; 3161 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3162 3163 /* read in matrix header */ 3164 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3165 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3166 M = header[1]; N = header[2]; nz = header[3]; 3167 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3168 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3169 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3170 3171 /* set block sizes from the viewer's .info file */ 3172 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3173 /* set global sizes if not set already */ 3174 if (mat->rmap->N < 0) mat->rmap->N = M; 3175 if (mat->cmap->N < 0) mat->cmap->N = N; 3176 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3177 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3178 3179 /* check if the matrix sizes are correct */ 3180 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3181 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3182 3183 /* read in row lengths and build row indices */ 3184 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3185 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3186 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3187 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3188 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3189 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3190 /* read in column indices and matrix values */ 3191 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3192 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3193 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3194 /* store matrix indices and values */ 3195 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3196 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3197 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3198 PetscFunctionReturn(0); 3199 } 3200 3201 /* Not scalable because of ISAllGather() unless getting all columns. */ 3202 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3203 { 3204 PetscErrorCode ierr; 3205 IS iscol_local; 3206 PetscBool isstride; 3207 PetscMPIInt lisstride=0,gisstride; 3208 3209 PetscFunctionBegin; 3210 /* check if we are grabbing all columns*/ 3211 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3212 3213 if (isstride) { 3214 PetscInt start,len,mstart,mlen; 3215 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3216 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3217 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3218 if (mstart == start && mlen-mstart == len) lisstride = 1; 3219 } 3220 3221 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3222 if (gisstride) { 3223 PetscInt N; 3224 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3225 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3226 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3227 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3228 } else { 3229 PetscInt cbs; 3230 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3231 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3232 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3233 } 3234 3235 *isseq = iscol_local; 3236 PetscFunctionReturn(0); 3237 } 3238 3239 /* 3240 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3241 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3242 3243 Input Parameters: 3244 mat - matrix 3245 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3246 i.e., mat->rstart <= isrow[i] < mat->rend 3247 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3248 i.e., mat->cstart <= iscol[i] < mat->cend 3249 Output Parameter: 3250 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3251 iscol_o - sequential column index set for retrieving mat->B 3252 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3253 */ 3254 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3255 { 3256 PetscErrorCode ierr; 3257 Vec x,cmap; 3258 const PetscInt *is_idx; 3259 PetscScalar *xarray,*cmaparray; 3260 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3261 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3262 Mat B=a->B; 3263 Vec lvec=a->lvec,lcmap; 3264 PetscInt i,cstart,cend,Bn=B->cmap->N; 3265 MPI_Comm comm; 3266 VecScatter Mvctx=a->Mvctx; 3267 3268 PetscFunctionBegin; 3269 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3270 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3271 3272 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3273 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3274 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3275 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3276 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3277 3278 /* Get start indices */ 3279 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 3280 isstart -= ncols; 3281 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3282 3283 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3284 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3285 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3286 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3287 for (i=0; i<ncols; i++) { 3288 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3289 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3290 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3291 } 3292 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3293 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3294 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3295 3296 /* Get iscol_d */ 3297 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3298 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3299 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3300 3301 /* Get isrow_d */ 3302 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3303 rstart = mat->rmap->rstart; 3304 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3305 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3306 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3307 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3308 3309 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3310 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3311 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3312 3313 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3314 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3315 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3316 3317 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3318 3319 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3320 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3321 3322 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3323 /* off-process column indices */ 3324 count = 0; 3325 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3326 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3327 3328 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3329 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3330 for (i=0; i<Bn; i++) { 3331 if (PetscRealPart(xarray[i]) > -1.0) { 3332 idx[count] = i; /* local column index in off-diagonal part B */ 3333 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3334 count++; 3335 } 3336 } 3337 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3338 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3339 3340 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3341 /* cannot ensure iscol_o has same blocksize as iscol! */ 3342 3343 ierr = PetscFree(idx);CHKERRQ(ierr); 3344 *garray = cmap1; 3345 3346 ierr = VecDestroy(&x);CHKERRQ(ierr); 3347 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3348 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3349 PetscFunctionReturn(0); 3350 } 3351 3352 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3353 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3354 { 3355 PetscErrorCode ierr; 3356 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3357 Mat M = NULL; 3358 MPI_Comm comm; 3359 IS iscol_d,isrow_d,iscol_o; 3360 Mat Asub = NULL,Bsub = NULL; 3361 PetscInt n; 3362 3363 PetscFunctionBegin; 3364 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3365 3366 if (call == MAT_REUSE_MATRIX) { 3367 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3368 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3369 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3370 3371 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3372 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3373 3374 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3375 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3376 3377 /* Update diagonal and off-diagonal portions of submat */ 3378 asub = (Mat_MPIAIJ*)(*submat)->data; 3379 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3380 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3381 if (n) { 3382 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3383 } 3384 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3385 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3386 3387 } else { /* call == MAT_INITIAL_MATRIX) */ 3388 const PetscInt *garray; 3389 PetscInt BsubN; 3390 3391 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3392 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3393 3394 /* Create local submatrices Asub and Bsub */ 3395 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3396 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3397 3398 /* Create submatrix M */ 3399 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3400 3401 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3402 asub = (Mat_MPIAIJ*)M->data; 3403 3404 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3405 n = asub->B->cmap->N; 3406 if (BsubN > n) { 3407 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3408 const PetscInt *idx; 3409 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3410 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3411 3412 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3413 j = 0; 3414 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3415 for (i=0; i<n; i++) { 3416 if (j >= BsubN) break; 3417 while (subgarray[i] > garray[j]) j++; 3418 3419 if (subgarray[i] == garray[j]) { 3420 idx_new[i] = idx[j++]; 3421 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3422 } 3423 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3424 3425 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3426 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3427 3428 } else if (BsubN < n) { 3429 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3430 } 3431 3432 ierr = PetscFree(garray);CHKERRQ(ierr); 3433 *submat = M; 3434 3435 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3436 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3437 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3438 3439 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3440 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3441 3442 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3443 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3444 } 3445 PetscFunctionReturn(0); 3446 } 3447 3448 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3449 { 3450 PetscErrorCode ierr; 3451 IS iscol_local=NULL,isrow_d; 3452 PetscInt csize; 3453 PetscInt n,i,j,start,end; 3454 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3455 MPI_Comm comm; 3456 3457 PetscFunctionBegin; 3458 /* If isrow has same processor distribution as mat, 3459 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3460 if (call == MAT_REUSE_MATRIX) { 3461 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3462 if (isrow_d) { 3463 sameRowDist = PETSC_TRUE; 3464 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3465 } else { 3466 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3467 if (iscol_local) { 3468 sameRowDist = PETSC_TRUE; 3469 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3470 } 3471 } 3472 } else { 3473 /* Check if isrow has same processor distribution as mat */ 3474 sameDist[0] = PETSC_FALSE; 3475 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3476 if (!n) { 3477 sameDist[0] = PETSC_TRUE; 3478 } else { 3479 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3480 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3481 if (i >= start && j < end) { 3482 sameDist[0] = PETSC_TRUE; 3483 } 3484 } 3485 3486 /* Check if iscol has same processor distribution as mat */ 3487 sameDist[1] = PETSC_FALSE; 3488 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3489 if (!n) { 3490 sameDist[1] = PETSC_TRUE; 3491 } else { 3492 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3493 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3494 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3495 } 3496 3497 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3498 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3499 sameRowDist = tsameDist[0]; 3500 } 3501 3502 if (sameRowDist) { 3503 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3504 /* isrow and iscol have same processor distribution as mat */ 3505 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3506 PetscFunctionReturn(0); 3507 } else { /* sameRowDist */ 3508 /* isrow has same processor distribution as mat */ 3509 if (call == MAT_INITIAL_MATRIX) { 3510 PetscBool sorted; 3511 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3512 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3513 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3514 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3515 3516 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3517 if (sorted) { 3518 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3519 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3520 PetscFunctionReturn(0); 3521 } 3522 } else { /* call == MAT_REUSE_MATRIX */ 3523 IS iscol_sub; 3524 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3525 if (iscol_sub) { 3526 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3527 PetscFunctionReturn(0); 3528 } 3529 } 3530 } 3531 } 3532 3533 /* General case: iscol -> iscol_local which has global size of iscol */ 3534 if (call == MAT_REUSE_MATRIX) { 3535 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3536 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3537 } else { 3538 if (!iscol_local) { 3539 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3540 } 3541 } 3542 3543 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3544 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3545 3546 if (call == MAT_INITIAL_MATRIX) { 3547 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3548 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3549 } 3550 PetscFunctionReturn(0); 3551 } 3552 3553 /*@C 3554 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3555 and "off-diagonal" part of the matrix in CSR format. 3556 3557 Collective 3558 3559 Input Parameters: 3560 + comm - MPI communicator 3561 . A - "diagonal" portion of matrix 3562 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3563 - garray - global index of B columns 3564 3565 Output Parameter: 3566 . mat - the matrix, with input A as its local diagonal matrix 3567 Level: advanced 3568 3569 Notes: 3570 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3571 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3572 3573 .seealso: MatCreateMPIAIJWithSplitArrays() 3574 @*/ 3575 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3576 { 3577 PetscErrorCode ierr; 3578 Mat_MPIAIJ *maij; 3579 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3580 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3581 PetscScalar *oa=b->a; 3582 Mat Bnew; 3583 PetscInt m,n,N; 3584 3585 PetscFunctionBegin; 3586 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3587 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3588 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3589 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3590 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3591 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3592 3593 /* Get global columns of mat */ 3594 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3595 3596 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3597 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3598 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3599 maij = (Mat_MPIAIJ*)(*mat)->data; 3600 3601 (*mat)->preallocated = PETSC_TRUE; 3602 3603 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3604 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3605 3606 /* Set A as diagonal portion of *mat */ 3607 maij->A = A; 3608 3609 nz = oi[m]; 3610 for (i=0; i<nz; i++) { 3611 col = oj[i]; 3612 oj[i] = garray[col]; 3613 } 3614 3615 /* Set Bnew as off-diagonal portion of *mat */ 3616 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3617 bnew = (Mat_SeqAIJ*)Bnew->data; 3618 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3619 maij->B = Bnew; 3620 3621 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3622 3623 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3624 b->free_a = PETSC_FALSE; 3625 b->free_ij = PETSC_FALSE; 3626 ierr = MatDestroy(&B);CHKERRQ(ierr); 3627 3628 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3629 bnew->free_a = PETSC_TRUE; 3630 bnew->free_ij = PETSC_TRUE; 3631 3632 /* condense columns of maij->B */ 3633 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3634 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3635 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3636 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3637 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3638 PetscFunctionReturn(0); 3639 } 3640 3641 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3642 3643 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3644 { 3645 PetscErrorCode ierr; 3646 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3647 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3648 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3649 Mat M,Msub,B=a->B; 3650 MatScalar *aa; 3651 Mat_SeqAIJ *aij; 3652 PetscInt *garray = a->garray,*colsub,Ncols; 3653 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3654 IS iscol_sub,iscmap; 3655 const PetscInt *is_idx,*cmap; 3656 PetscBool allcolumns=PETSC_FALSE; 3657 MPI_Comm comm; 3658 3659 PetscFunctionBegin; 3660 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3661 if (call == MAT_REUSE_MATRIX) { 3662 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3663 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3664 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3665 3666 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3667 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3668 3669 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3670 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3671 3672 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3673 3674 } else { /* call == MAT_INITIAL_MATRIX) */ 3675 PetscBool flg; 3676 3677 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3678 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3679 3680 /* (1) iscol -> nonscalable iscol_local */ 3681 /* Check for special case: each processor gets entire matrix columns */ 3682 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3683 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3684 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3685 if (allcolumns) { 3686 iscol_sub = iscol_local; 3687 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3688 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3689 3690 } else { 3691 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3692 PetscInt *idx,*cmap1,k; 3693 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3694 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3695 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3696 count = 0; 3697 k = 0; 3698 for (i=0; i<Ncols; i++) { 3699 j = is_idx[i]; 3700 if (j >= cstart && j < cend) { 3701 /* diagonal part of mat */ 3702 idx[count] = j; 3703 cmap1[count++] = i; /* column index in submat */ 3704 } else if (Bn) { 3705 /* off-diagonal part of mat */ 3706 if (j == garray[k]) { 3707 idx[count] = j; 3708 cmap1[count++] = i; /* column index in submat */ 3709 } else if (j > garray[k]) { 3710 while (j > garray[k] && k < Bn-1) k++; 3711 if (j == garray[k]) { 3712 idx[count] = j; 3713 cmap1[count++] = i; /* column index in submat */ 3714 } 3715 } 3716 } 3717 } 3718 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3719 3720 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3721 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3722 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3723 3724 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3725 } 3726 3727 /* (3) Create sequential Msub */ 3728 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3729 } 3730 3731 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3732 aij = (Mat_SeqAIJ*)(Msub)->data; 3733 ii = aij->i; 3734 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3735 3736 /* 3737 m - number of local rows 3738 Ncols - number of columns (same on all processors) 3739 rstart - first row in new global matrix generated 3740 */ 3741 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3742 3743 if (call == MAT_INITIAL_MATRIX) { 3744 /* (4) Create parallel newmat */ 3745 PetscMPIInt rank,size; 3746 PetscInt csize; 3747 3748 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3749 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3750 3751 /* 3752 Determine the number of non-zeros in the diagonal and off-diagonal 3753 portions of the matrix in order to do correct preallocation 3754 */ 3755 3756 /* first get start and end of "diagonal" columns */ 3757 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3758 if (csize == PETSC_DECIDE) { 3759 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3760 if (mglobal == Ncols) { /* square matrix */ 3761 nlocal = m; 3762 } else { 3763 nlocal = Ncols/size + ((Ncols % size) > rank); 3764 } 3765 } else { 3766 nlocal = csize; 3767 } 3768 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3769 rstart = rend - nlocal; 3770 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3771 3772 /* next, compute all the lengths */ 3773 jj = aij->j; 3774 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3775 olens = dlens + m; 3776 for (i=0; i<m; i++) { 3777 jend = ii[i+1] - ii[i]; 3778 olen = 0; 3779 dlen = 0; 3780 for (j=0; j<jend; j++) { 3781 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3782 else dlen++; 3783 jj++; 3784 } 3785 olens[i] = olen; 3786 dlens[i] = dlen; 3787 } 3788 3789 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3790 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3791 3792 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3793 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3794 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3795 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3796 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3797 ierr = PetscFree(dlens);CHKERRQ(ierr); 3798 3799 } else { /* call == MAT_REUSE_MATRIX */ 3800 M = *newmat; 3801 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3802 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3803 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3804 /* 3805 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3806 rather than the slower MatSetValues(). 3807 */ 3808 M->was_assembled = PETSC_TRUE; 3809 M->assembled = PETSC_FALSE; 3810 } 3811 3812 /* (5) Set values of Msub to *newmat */ 3813 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3814 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3815 3816 jj = aij->j; 3817 ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3818 for (i=0; i<m; i++) { 3819 row = rstart + i; 3820 nz = ii[i+1] - ii[i]; 3821 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3822 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3823 jj += nz; aa += nz; 3824 } 3825 ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr); 3826 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3827 3828 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3829 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 3831 ierr = PetscFree(colsub);CHKERRQ(ierr); 3832 3833 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3834 if (call == MAT_INITIAL_MATRIX) { 3835 *newmat = M; 3836 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3837 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3838 3839 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3840 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3841 3842 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3843 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3844 3845 if (iscol_local) { 3846 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3847 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3848 } 3849 } 3850 PetscFunctionReturn(0); 3851 } 3852 3853 /* 3854 Not great since it makes two copies of the submatrix, first an SeqAIJ 3855 in local and then by concatenating the local matrices the end result. 3856 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3857 3858 Note: This requires a sequential iscol with all indices. 3859 */ 3860 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3861 { 3862 PetscErrorCode ierr; 3863 PetscMPIInt rank,size; 3864 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3865 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3866 Mat M,Mreuse; 3867 MatScalar *aa,*vwork; 3868 MPI_Comm comm; 3869 Mat_SeqAIJ *aij; 3870 PetscBool colflag,allcolumns=PETSC_FALSE; 3871 3872 PetscFunctionBegin; 3873 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3874 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3875 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3876 3877 /* Check for special case: each processor gets entire matrix columns */ 3878 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3879 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3880 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3881 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3882 3883 if (call == MAT_REUSE_MATRIX) { 3884 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3885 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3886 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3887 } else { 3888 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3889 } 3890 3891 /* 3892 m - number of local rows 3893 n - number of columns (same on all processors) 3894 rstart - first row in new global matrix generated 3895 */ 3896 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3897 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3898 if (call == MAT_INITIAL_MATRIX) { 3899 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3900 ii = aij->i; 3901 jj = aij->j; 3902 3903 /* 3904 Determine the number of non-zeros in the diagonal and off-diagonal 3905 portions of the matrix in order to do correct preallocation 3906 */ 3907 3908 /* first get start and end of "diagonal" columns */ 3909 if (csize == PETSC_DECIDE) { 3910 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3911 if (mglobal == n) { /* square matrix */ 3912 nlocal = m; 3913 } else { 3914 nlocal = n/size + ((n % size) > rank); 3915 } 3916 } else { 3917 nlocal = csize; 3918 } 3919 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3920 rstart = rend - nlocal; 3921 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3922 3923 /* next, compute all the lengths */ 3924 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3925 olens = dlens + m; 3926 for (i=0; i<m; i++) { 3927 jend = ii[i+1] - ii[i]; 3928 olen = 0; 3929 dlen = 0; 3930 for (j=0; j<jend; j++) { 3931 if (*jj < rstart || *jj >= rend) olen++; 3932 else dlen++; 3933 jj++; 3934 } 3935 olens[i] = olen; 3936 dlens[i] = dlen; 3937 } 3938 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3939 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3940 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3941 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3942 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3943 ierr = PetscFree(dlens);CHKERRQ(ierr); 3944 } else { 3945 PetscInt ml,nl; 3946 3947 M = *newmat; 3948 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3949 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3950 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3951 /* 3952 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3953 rather than the slower MatSetValues(). 3954 */ 3955 M->was_assembled = PETSC_TRUE; 3956 M->assembled = PETSC_FALSE; 3957 } 3958 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3959 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3960 ii = aij->i; 3961 jj = aij->j; 3962 3963 /* trigger copy to CPU if needed */ 3964 ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3965 for (i=0; i<m; i++) { 3966 row = rstart + i; 3967 nz = ii[i+1] - ii[i]; 3968 cwork = jj; jj += nz; 3969 vwork = aa; aa += nz; 3970 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3971 } 3972 ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr); 3973 3974 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3976 *newmat = M; 3977 3978 /* save submatrix used in processor for next request */ 3979 if (call == MAT_INITIAL_MATRIX) { 3980 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3981 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3982 } 3983 PetscFunctionReturn(0); 3984 } 3985 3986 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3987 { 3988 PetscInt m,cstart, cend,j,nnz,i,d; 3989 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3990 const PetscInt *JJ; 3991 PetscErrorCode ierr; 3992 PetscBool nooffprocentries; 3993 3994 PetscFunctionBegin; 3995 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3996 3997 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3998 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3999 m = B->rmap->n; 4000 cstart = B->cmap->rstart; 4001 cend = B->cmap->rend; 4002 rstart = B->rmap->rstart; 4003 4004 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 4005 4006 if (PetscDefined(USE_DEBUG)) { 4007 for (i=0; i<m; i++) { 4008 nnz = Ii[i+1]- Ii[i]; 4009 JJ = J + Ii[i]; 4010 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 4011 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 4012 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4013 } 4014 } 4015 4016 for (i=0; i<m; i++) { 4017 nnz = Ii[i+1]- Ii[i]; 4018 JJ = J + Ii[i]; 4019 nnz_max = PetscMax(nnz_max,nnz); 4020 d = 0; 4021 for (j=0; j<nnz; j++) { 4022 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4023 } 4024 d_nnz[i] = d; 4025 o_nnz[i] = nnz - d; 4026 } 4027 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4028 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4029 4030 for (i=0; i<m; i++) { 4031 ii = i + rstart; 4032 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4033 } 4034 nooffprocentries = B->nooffprocentries; 4035 B->nooffprocentries = PETSC_TRUE; 4036 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4037 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4038 B->nooffprocentries = nooffprocentries; 4039 4040 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4041 PetscFunctionReturn(0); 4042 } 4043 4044 /*@ 4045 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4046 (the default parallel PETSc format). 4047 4048 Collective 4049 4050 Input Parameters: 4051 + B - the matrix 4052 . i - the indices into j for the start of each local row (starts with zero) 4053 . j - the column indices for each local row (starts with zero) 4054 - v - optional values in the matrix 4055 4056 Level: developer 4057 4058 Notes: 4059 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4060 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4061 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4062 4063 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4064 4065 The format which is used for the sparse matrix input, is equivalent to a 4066 row-major ordering.. i.e for the following matrix, the input data expected is 4067 as shown 4068 4069 $ 1 0 0 4070 $ 2 0 3 P0 4071 $ ------- 4072 $ 4 5 6 P1 4073 $ 4074 $ Process0 [P0]: rows_owned=[0,1] 4075 $ i = {0,1,3} [size = nrow+1 = 2+1] 4076 $ j = {0,0,2} [size = 3] 4077 $ v = {1,2,3} [size = 3] 4078 $ 4079 $ Process1 [P1]: rows_owned=[2] 4080 $ i = {0,3} [size = nrow+1 = 1+1] 4081 $ j = {0,1,2} [size = 3] 4082 $ v = {4,5,6} [size = 3] 4083 4084 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4085 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4086 @*/ 4087 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4088 { 4089 PetscErrorCode ierr; 4090 4091 PetscFunctionBegin; 4092 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4093 PetscFunctionReturn(0); 4094 } 4095 4096 /*@C 4097 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4098 (the default parallel PETSc format). For good matrix assembly performance 4099 the user should preallocate the matrix storage by setting the parameters 4100 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4101 performance can be increased by more than a factor of 50. 4102 4103 Collective 4104 4105 Input Parameters: 4106 + B - the matrix 4107 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4108 (same value is used for all local rows) 4109 . d_nnz - array containing the number of nonzeros in the various rows of the 4110 DIAGONAL portion of the local submatrix (possibly different for each row) 4111 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4112 The size of this array is equal to the number of local rows, i.e 'm'. 4113 For matrices that will be factored, you must leave room for (and set) 4114 the diagonal entry even if it is zero. 4115 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4116 submatrix (same value is used for all local rows). 4117 - o_nnz - array containing the number of nonzeros in the various rows of the 4118 OFF-DIAGONAL portion of the local submatrix (possibly different for 4119 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4120 structure. The size of this array is equal to the number 4121 of local rows, i.e 'm'. 4122 4123 If the *_nnz parameter is given then the *_nz parameter is ignored 4124 4125 The AIJ format (also called the Yale sparse matrix format or 4126 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4127 storage. The stored row and column indices begin with zero. 4128 See Users-Manual: ch_mat for details. 4129 4130 The parallel matrix is partitioned such that the first m0 rows belong to 4131 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4132 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4133 4134 The DIAGONAL portion of the local submatrix of a processor can be defined 4135 as the submatrix which is obtained by extraction the part corresponding to 4136 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4137 first row that belongs to the processor, r2 is the last row belonging to 4138 the this processor, and c1-c2 is range of indices of the local part of a 4139 vector suitable for applying the matrix to. This is an mxn matrix. In the 4140 common case of a square matrix, the row and column ranges are the same and 4141 the DIAGONAL part is also square. The remaining portion of the local 4142 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4143 4144 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4145 4146 You can call MatGetInfo() to get information on how effective the preallocation was; 4147 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4148 You can also run with the option -info and look for messages with the string 4149 malloc in them to see if additional memory allocation was needed. 4150 4151 Example usage: 4152 4153 Consider the following 8x8 matrix with 34 non-zero values, that is 4154 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4155 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4156 as follows: 4157 4158 .vb 4159 1 2 0 | 0 3 0 | 0 4 4160 Proc0 0 5 6 | 7 0 0 | 8 0 4161 9 0 10 | 11 0 0 | 12 0 4162 ------------------------------------- 4163 13 0 14 | 15 16 17 | 0 0 4164 Proc1 0 18 0 | 19 20 21 | 0 0 4165 0 0 0 | 22 23 0 | 24 0 4166 ------------------------------------- 4167 Proc2 25 26 27 | 0 0 28 | 29 0 4168 30 0 0 | 31 32 33 | 0 34 4169 .ve 4170 4171 This can be represented as a collection of submatrices as: 4172 4173 .vb 4174 A B C 4175 D E F 4176 G H I 4177 .ve 4178 4179 Where the submatrices A,B,C are owned by proc0, D,E,F are 4180 owned by proc1, G,H,I are owned by proc2. 4181 4182 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4183 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4184 The 'M','N' parameters are 8,8, and have the same values on all procs. 4185 4186 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4187 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4188 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4189 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4190 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4191 matrix, ans [DF] as another SeqAIJ matrix. 4192 4193 When d_nz, o_nz parameters are specified, d_nz storage elements are 4194 allocated for every row of the local diagonal submatrix, and o_nz 4195 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4196 One way to choose d_nz and o_nz is to use the max nonzerors per local 4197 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4198 In this case, the values of d_nz,o_nz are: 4199 .vb 4200 proc0 : dnz = 2, o_nz = 2 4201 proc1 : dnz = 3, o_nz = 2 4202 proc2 : dnz = 1, o_nz = 4 4203 .ve 4204 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4205 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4206 for proc3. i.e we are using 12+15+10=37 storage locations to store 4207 34 values. 4208 4209 When d_nnz, o_nnz parameters are specified, the storage is specified 4210 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4211 In the above case the values for d_nnz,o_nnz are: 4212 .vb 4213 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4214 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4215 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4216 .ve 4217 Here the space allocated is sum of all the above values i.e 34, and 4218 hence pre-allocation is perfect. 4219 4220 Level: intermediate 4221 4222 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4223 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4224 @*/ 4225 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4226 { 4227 PetscErrorCode ierr; 4228 4229 PetscFunctionBegin; 4230 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4231 PetscValidType(B,1); 4232 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4233 PetscFunctionReturn(0); 4234 } 4235 4236 /*@ 4237 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4238 CSR format for the local rows. 4239 4240 Collective 4241 4242 Input Parameters: 4243 + comm - MPI communicator 4244 . m - number of local rows (Cannot be PETSC_DECIDE) 4245 . n - This value should be the same as the local size used in creating the 4246 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4247 calculated if N is given) For square matrices n is almost always m. 4248 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4249 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4250 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4251 . j - column indices 4252 - a - matrix values 4253 4254 Output Parameter: 4255 . mat - the matrix 4256 4257 Level: intermediate 4258 4259 Notes: 4260 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4261 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4262 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4263 4264 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4265 4266 The format which is used for the sparse matrix input, is equivalent to a 4267 row-major ordering.. i.e for the following matrix, the input data expected is 4268 as shown 4269 4270 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4271 4272 $ 1 0 0 4273 $ 2 0 3 P0 4274 $ ------- 4275 $ 4 5 6 P1 4276 $ 4277 $ Process0 [P0]: rows_owned=[0,1] 4278 $ i = {0,1,3} [size = nrow+1 = 2+1] 4279 $ j = {0,0,2} [size = 3] 4280 $ v = {1,2,3} [size = 3] 4281 $ 4282 $ Process1 [P1]: rows_owned=[2] 4283 $ i = {0,3} [size = nrow+1 = 1+1] 4284 $ j = {0,1,2} [size = 3] 4285 $ v = {4,5,6} [size = 3] 4286 4287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4288 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4289 @*/ 4290 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4291 { 4292 PetscErrorCode ierr; 4293 4294 PetscFunctionBegin; 4295 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4296 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4297 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4298 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4299 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4300 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4301 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4302 PetscFunctionReturn(0); 4303 } 4304 4305 /*@ 4306 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4307 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4308 4309 Collective 4310 4311 Input Parameters: 4312 + mat - the matrix 4313 . m - number of local rows (Cannot be PETSC_DECIDE) 4314 . n - This value should be the same as the local size used in creating the 4315 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4316 calculated if N is given) For square matrices n is almost always m. 4317 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4318 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4319 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4320 . J - column indices 4321 - v - matrix values 4322 4323 Level: intermediate 4324 4325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4326 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4327 @*/ 4328 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4329 { 4330 PetscErrorCode ierr; 4331 PetscInt cstart,nnz,i,j; 4332 PetscInt *ld; 4333 PetscBool nooffprocentries; 4334 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4335 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4336 PetscScalar *ad = Ad->a, *ao = Ao->a; 4337 const PetscInt *Adi = Ad->i; 4338 PetscInt ldi,Iii,md; 4339 4340 PetscFunctionBegin; 4341 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4342 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4343 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4344 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4345 4346 cstart = mat->cmap->rstart; 4347 if (!Aij->ld) { 4348 /* count number of entries below block diagonal */ 4349 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4350 Aij->ld = ld; 4351 for (i=0; i<m; i++) { 4352 nnz = Ii[i+1]- Ii[i]; 4353 j = 0; 4354 while (J[j] < cstart && j < nnz) {j++;} 4355 J += nnz; 4356 ld[i] = j; 4357 } 4358 } else { 4359 ld = Aij->ld; 4360 } 4361 4362 for (i=0; i<m; i++) { 4363 nnz = Ii[i+1]- Ii[i]; 4364 Iii = Ii[i]; 4365 ldi = ld[i]; 4366 md = Adi[i+1]-Adi[i]; 4367 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4368 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4369 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4370 ad += md; 4371 ao += nnz - md; 4372 } 4373 nooffprocentries = mat->nooffprocentries; 4374 mat->nooffprocentries = PETSC_TRUE; 4375 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4376 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4377 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4378 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4379 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4380 mat->nooffprocentries = nooffprocentries; 4381 PetscFunctionReturn(0); 4382 } 4383 4384 /*@C 4385 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4386 (the default parallel PETSc format). For good matrix assembly performance 4387 the user should preallocate the matrix storage by setting the parameters 4388 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4389 performance can be increased by more than a factor of 50. 4390 4391 Collective 4392 4393 Input Parameters: 4394 + comm - MPI communicator 4395 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4396 This value should be the same as the local size used in creating the 4397 y vector for the matrix-vector product y = Ax. 4398 . n - This value should be the same as the local size used in creating the 4399 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4400 calculated if N is given) For square matrices n is almost always m. 4401 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4402 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4403 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4404 (same value is used for all local rows) 4405 . d_nnz - array containing the number of nonzeros in the various rows of the 4406 DIAGONAL portion of the local submatrix (possibly different for each row) 4407 or NULL, if d_nz is used to specify the nonzero structure. 4408 The size of this array is equal to the number of local rows, i.e 'm'. 4409 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4410 submatrix (same value is used for all local rows). 4411 - o_nnz - array containing the number of nonzeros in the various rows of the 4412 OFF-DIAGONAL portion of the local submatrix (possibly different for 4413 each row) or NULL, if o_nz is used to specify the nonzero 4414 structure. The size of this array is equal to the number 4415 of local rows, i.e 'm'. 4416 4417 Output Parameter: 4418 . A - the matrix 4419 4420 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4421 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4422 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4423 4424 Notes: 4425 If the *_nnz parameter is given then the *_nz parameter is ignored 4426 4427 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4428 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4429 storage requirements for this matrix. 4430 4431 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4432 processor than it must be used on all processors that share the object for 4433 that argument. 4434 4435 The user MUST specify either the local or global matrix dimensions 4436 (possibly both). 4437 4438 The parallel matrix is partitioned across processors such that the 4439 first m0 rows belong to process 0, the next m1 rows belong to 4440 process 1, the next m2 rows belong to process 2 etc.. where 4441 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4442 values corresponding to [m x N] submatrix. 4443 4444 The columns are logically partitioned with the n0 columns belonging 4445 to 0th partition, the next n1 columns belonging to the next 4446 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4447 4448 The DIAGONAL portion of the local submatrix on any given processor 4449 is the submatrix corresponding to the rows and columns m,n 4450 corresponding to the given processor. i.e diagonal matrix on 4451 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4452 etc. The remaining portion of the local submatrix [m x (N-n)] 4453 constitute the OFF-DIAGONAL portion. The example below better 4454 illustrates this concept. 4455 4456 For a square global matrix we define each processor's diagonal portion 4457 to be its local rows and the corresponding columns (a square submatrix); 4458 each processor's off-diagonal portion encompasses the remainder of the 4459 local matrix (a rectangular submatrix). 4460 4461 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4462 4463 When calling this routine with a single process communicator, a matrix of 4464 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4465 type of communicator, use the construction mechanism 4466 .vb 4467 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4468 .ve 4469 4470 $ MatCreate(...,&A); 4471 $ MatSetType(A,MATMPIAIJ); 4472 $ MatSetSizes(A, m,n,M,N); 4473 $ MatMPIAIJSetPreallocation(A,...); 4474 4475 By default, this format uses inodes (identical nodes) when possible. 4476 We search for consecutive rows with the same nonzero structure, thereby 4477 reusing matrix information to achieve increased efficiency. 4478 4479 Options Database Keys: 4480 + -mat_no_inode - Do not use inodes 4481 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4482 4483 4484 4485 Example usage: 4486 4487 Consider the following 8x8 matrix with 34 non-zero values, that is 4488 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4489 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4490 as follows 4491 4492 .vb 4493 1 2 0 | 0 3 0 | 0 4 4494 Proc0 0 5 6 | 7 0 0 | 8 0 4495 9 0 10 | 11 0 0 | 12 0 4496 ------------------------------------- 4497 13 0 14 | 15 16 17 | 0 0 4498 Proc1 0 18 0 | 19 20 21 | 0 0 4499 0 0 0 | 22 23 0 | 24 0 4500 ------------------------------------- 4501 Proc2 25 26 27 | 0 0 28 | 29 0 4502 30 0 0 | 31 32 33 | 0 34 4503 .ve 4504 4505 This can be represented as a collection of submatrices as 4506 4507 .vb 4508 A B C 4509 D E F 4510 G H I 4511 .ve 4512 4513 Where the submatrices A,B,C are owned by proc0, D,E,F are 4514 owned by proc1, G,H,I are owned by proc2. 4515 4516 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4517 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4518 The 'M','N' parameters are 8,8, and have the same values on all procs. 4519 4520 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4521 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4522 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4523 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4524 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4525 matrix, ans [DF] as another SeqAIJ matrix. 4526 4527 When d_nz, o_nz parameters are specified, d_nz storage elements are 4528 allocated for every row of the local diagonal submatrix, and o_nz 4529 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4530 One way to choose d_nz and o_nz is to use the max nonzerors per local 4531 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4532 In this case, the values of d_nz,o_nz are 4533 .vb 4534 proc0 : dnz = 2, o_nz = 2 4535 proc1 : dnz = 3, o_nz = 2 4536 proc2 : dnz = 1, o_nz = 4 4537 .ve 4538 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4539 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4540 for proc3. i.e we are using 12+15+10=37 storage locations to store 4541 34 values. 4542 4543 When d_nnz, o_nnz parameters are specified, the storage is specified 4544 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4545 In the above case the values for d_nnz,o_nnz are 4546 .vb 4547 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4548 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4549 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4550 .ve 4551 Here the space allocated is sum of all the above values i.e 34, and 4552 hence pre-allocation is perfect. 4553 4554 Level: intermediate 4555 4556 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4557 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4558 @*/ 4559 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4560 { 4561 PetscErrorCode ierr; 4562 PetscMPIInt size; 4563 4564 PetscFunctionBegin; 4565 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4566 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4567 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4568 if (size > 1) { 4569 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4570 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4571 } else { 4572 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4573 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4574 } 4575 PetscFunctionReturn(0); 4576 } 4577 4578 /*@C 4579 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4580 4581 Not collective 4582 4583 Input Parameter: 4584 . A - The MPIAIJ matrix 4585 4586 Output Parameters: 4587 + Ad - The local diagonal block as a SeqAIJ matrix 4588 . Ao - The local off-diagonal block as a SeqAIJ matrix 4589 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4590 4591 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4592 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4593 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4594 local column numbers to global column numbers in the original matrix. 4595 4596 Level: intermediate 4597 4598 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4599 @*/ 4600 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4601 { 4602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4603 PetscBool flg; 4604 PetscErrorCode ierr; 4605 4606 PetscFunctionBegin; 4607 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4608 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4609 if (Ad) *Ad = a->A; 4610 if (Ao) *Ao = a->B; 4611 if (colmap) *colmap = a->garray; 4612 PetscFunctionReturn(0); 4613 } 4614 4615 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4616 { 4617 PetscErrorCode ierr; 4618 PetscInt m,N,i,rstart,nnz,Ii; 4619 PetscInt *indx; 4620 PetscScalar *values; 4621 4622 PetscFunctionBegin; 4623 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4624 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4625 PetscInt *dnz,*onz,sum,bs,cbs; 4626 4627 if (n == PETSC_DECIDE) { 4628 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4629 } 4630 /* Check sum(n) = N */ 4631 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4632 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4633 4634 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4635 rstart -= m; 4636 4637 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4638 for (i=0; i<m; i++) { 4639 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4640 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4641 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4642 } 4643 4644 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4645 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4646 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4647 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4648 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4649 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4650 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4651 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4652 } 4653 4654 /* numeric phase */ 4655 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4656 for (i=0; i<m; i++) { 4657 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4658 Ii = i + rstart; 4659 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4660 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4661 } 4662 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4663 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4664 PetscFunctionReturn(0); 4665 } 4666 4667 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4668 { 4669 PetscErrorCode ierr; 4670 PetscMPIInt rank; 4671 PetscInt m,N,i,rstart,nnz; 4672 size_t len; 4673 const PetscInt *indx; 4674 PetscViewer out; 4675 char *name; 4676 Mat B; 4677 const PetscScalar *values; 4678 4679 PetscFunctionBegin; 4680 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4681 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4682 /* Should this be the type of the diagonal block of A? */ 4683 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4684 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4685 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4686 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4687 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4688 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4689 for (i=0; i<m; i++) { 4690 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4691 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4692 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4693 } 4694 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4695 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4696 4697 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 4698 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4699 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4700 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4701 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4702 ierr = PetscFree(name);CHKERRQ(ierr); 4703 ierr = MatView(B,out);CHKERRQ(ierr); 4704 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4705 ierr = MatDestroy(&B);CHKERRQ(ierr); 4706 PetscFunctionReturn(0); 4707 } 4708 4709 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4710 { 4711 PetscErrorCode ierr; 4712 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4713 4714 PetscFunctionBegin; 4715 if (!merge) PetscFunctionReturn(0); 4716 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4717 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4718 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4719 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4720 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4721 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4722 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4723 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4724 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4725 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4726 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4727 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4728 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4729 ierr = PetscFree(merge);CHKERRQ(ierr); 4730 PetscFunctionReturn(0); 4731 } 4732 4733 #include <../src/mat/utils/freespace.h> 4734 #include <petscbt.h> 4735 4736 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4737 { 4738 PetscErrorCode ierr; 4739 MPI_Comm comm; 4740 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4741 PetscMPIInt size,rank,taga,*len_s; 4742 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4743 PetscInt proc,m; 4744 PetscInt **buf_ri,**buf_rj; 4745 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4746 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4747 MPI_Request *s_waits,*r_waits; 4748 MPI_Status *status; 4749 MatScalar *aa=a->a; 4750 MatScalar **abuf_r,*ba_i; 4751 Mat_Merge_SeqsToMPI *merge; 4752 PetscContainer container; 4753 4754 PetscFunctionBegin; 4755 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4756 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4757 4758 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4759 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4760 4761 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4762 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4763 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4764 4765 bi = merge->bi; 4766 bj = merge->bj; 4767 buf_ri = merge->buf_ri; 4768 buf_rj = merge->buf_rj; 4769 4770 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4771 owners = merge->rowmap->range; 4772 len_s = merge->len_s; 4773 4774 /* send and recv matrix values */ 4775 /*-----------------------------*/ 4776 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4777 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4778 4779 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4780 for (proc=0,k=0; proc<size; proc++) { 4781 if (!len_s[proc]) continue; 4782 i = owners[proc]; 4783 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr); 4784 k++; 4785 } 4786 4787 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);} 4788 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);} 4789 ierr = PetscFree(status);CHKERRQ(ierr); 4790 4791 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4792 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4793 4794 /* insert mat values of mpimat */ 4795 /*----------------------------*/ 4796 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4797 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4798 4799 for (k=0; k<merge->nrecv; k++) { 4800 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4801 nrows = *(buf_ri_k[k]); 4802 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4803 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4804 } 4805 4806 /* set values of ba */ 4807 m = merge->rowmap->n; 4808 for (i=0; i<m; i++) { 4809 arow = owners[rank] + i; 4810 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4811 bnzi = bi[i+1] - bi[i]; 4812 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4813 4814 /* add local non-zero vals of this proc's seqmat into ba */ 4815 anzi = ai[arow+1] - ai[arow]; 4816 aj = a->j + ai[arow]; 4817 aa = a->a + ai[arow]; 4818 nextaj = 0; 4819 for (j=0; nextaj<anzi; j++) { 4820 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4821 ba_i[j] += aa[nextaj++]; 4822 } 4823 } 4824 4825 /* add received vals into ba */ 4826 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4827 /* i-th row */ 4828 if (i == *nextrow[k]) { 4829 anzi = *(nextai[k]+1) - *nextai[k]; 4830 aj = buf_rj[k] + *(nextai[k]); 4831 aa = abuf_r[k] + *(nextai[k]); 4832 nextaj = 0; 4833 for (j=0; nextaj<anzi; j++) { 4834 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4835 ba_i[j] += aa[nextaj++]; 4836 } 4837 } 4838 nextrow[k]++; nextai[k]++; 4839 } 4840 } 4841 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4842 } 4843 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4844 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4845 4846 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4847 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4848 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4849 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4850 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4851 PetscFunctionReturn(0); 4852 } 4853 4854 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4855 { 4856 PetscErrorCode ierr; 4857 Mat B_mpi; 4858 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4859 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4860 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4861 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4862 PetscInt len,proc,*dnz,*onz,bs,cbs; 4863 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4864 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4865 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4866 MPI_Status *status; 4867 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4868 PetscBT lnkbt; 4869 Mat_Merge_SeqsToMPI *merge; 4870 PetscContainer container; 4871 4872 PetscFunctionBegin; 4873 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4874 4875 /* make sure it is a PETSc comm */ 4876 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4877 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 4878 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 4879 4880 ierr = PetscNew(&merge);CHKERRQ(ierr); 4881 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4882 4883 /* determine row ownership */ 4884 /*---------------------------------------------------------*/ 4885 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4886 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4887 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4888 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4889 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4890 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4891 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4892 4893 m = merge->rowmap->n; 4894 owners = merge->rowmap->range; 4895 4896 /* determine the number of messages to send, their lengths */ 4897 /*---------------------------------------------------------*/ 4898 len_s = merge->len_s; 4899 4900 len = 0; /* length of buf_si[] */ 4901 merge->nsend = 0; 4902 for (proc=0; proc<size; proc++) { 4903 len_si[proc] = 0; 4904 if (proc == rank) { 4905 len_s[proc] = 0; 4906 } else { 4907 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4908 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4909 } 4910 if (len_s[proc]) { 4911 merge->nsend++; 4912 nrows = 0; 4913 for (i=owners[proc]; i<owners[proc+1]; i++) { 4914 if (ai[i+1] > ai[i]) nrows++; 4915 } 4916 len_si[proc] = 2*(nrows+1); 4917 len += len_si[proc]; 4918 } 4919 } 4920 4921 /* determine the number and length of messages to receive for ij-structure */ 4922 /*-------------------------------------------------------------------------*/ 4923 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4924 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4925 4926 /* post the Irecv of j-structure */ 4927 /*-------------------------------*/ 4928 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4929 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4930 4931 /* post the Isend of j-structure */ 4932 /*--------------------------------*/ 4933 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4934 4935 for (proc=0, k=0; proc<size; proc++) { 4936 if (!len_s[proc]) continue; 4937 i = owners[proc]; 4938 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr); 4939 k++; 4940 } 4941 4942 /* receives and sends of j-structure are complete */ 4943 /*------------------------------------------------*/ 4944 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);} 4945 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);} 4946 4947 /* send and recv i-structure */ 4948 /*---------------------------*/ 4949 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4950 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4951 4952 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4953 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4954 for (proc=0,k=0; proc<size; proc++) { 4955 if (!len_s[proc]) continue; 4956 /* form outgoing message for i-structure: 4957 buf_si[0]: nrows to be sent 4958 [1:nrows]: row index (global) 4959 [nrows+1:2*nrows+1]: i-structure index 4960 */ 4961 /*-------------------------------------------*/ 4962 nrows = len_si[proc]/2 - 1; 4963 buf_si_i = buf_si + nrows+1; 4964 buf_si[0] = nrows; 4965 buf_si_i[0] = 0; 4966 nrows = 0; 4967 for (i=owners[proc]; i<owners[proc+1]; i++) { 4968 anzi = ai[i+1] - ai[i]; 4969 if (anzi) { 4970 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4971 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4972 nrows++; 4973 } 4974 } 4975 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr); 4976 k++; 4977 buf_si += len_si[proc]; 4978 } 4979 4980 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);} 4981 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);} 4982 4983 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4984 for (i=0; i<merge->nrecv; i++) { 4985 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4986 } 4987 4988 ierr = PetscFree(len_si);CHKERRQ(ierr); 4989 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4990 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4991 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4992 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4993 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4994 ierr = PetscFree(status);CHKERRQ(ierr); 4995 4996 /* compute a local seq matrix in each processor */ 4997 /*----------------------------------------------*/ 4998 /* allocate bi array and free space for accumulating nonzero column info */ 4999 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 5000 bi[0] = 0; 5001 5002 /* create and initialize a linked list */ 5003 nlnk = N+1; 5004 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5005 5006 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5007 len = ai[owners[rank+1]] - ai[owners[rank]]; 5008 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 5009 5010 current_space = free_space; 5011 5012 /* determine symbolic info for each local row */ 5013 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 5014 5015 for (k=0; k<merge->nrecv; k++) { 5016 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5017 nrows = *buf_ri_k[k]; 5018 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5019 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5020 } 5021 5022 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5023 len = 0; 5024 for (i=0; i<m; i++) { 5025 bnzi = 0; 5026 /* add local non-zero cols of this proc's seqmat into lnk */ 5027 arow = owners[rank] + i; 5028 anzi = ai[arow+1] - ai[arow]; 5029 aj = a->j + ai[arow]; 5030 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5031 bnzi += nlnk; 5032 /* add received col data into lnk */ 5033 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5034 if (i == *nextrow[k]) { /* i-th row */ 5035 anzi = *(nextai[k]+1) - *nextai[k]; 5036 aj = buf_rj[k] + *nextai[k]; 5037 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5038 bnzi += nlnk; 5039 nextrow[k]++; nextai[k]++; 5040 } 5041 } 5042 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5043 5044 /* if free space is not available, make more free space */ 5045 if (current_space->local_remaining<bnzi) { 5046 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5047 nspacedouble++; 5048 } 5049 /* copy data into free space, then initialize lnk */ 5050 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5051 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5052 5053 current_space->array += bnzi; 5054 current_space->local_used += bnzi; 5055 current_space->local_remaining -= bnzi; 5056 5057 bi[i+1] = bi[i] + bnzi; 5058 } 5059 5060 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5061 5062 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5063 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5064 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5065 5066 /* create symbolic parallel matrix B_mpi */ 5067 /*---------------------------------------*/ 5068 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5069 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5070 if (n==PETSC_DECIDE) { 5071 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5072 } else { 5073 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5074 } 5075 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5076 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5077 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5078 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5079 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5080 5081 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5082 B_mpi->assembled = PETSC_FALSE; 5083 merge->bi = bi; 5084 merge->bj = bj; 5085 merge->buf_ri = buf_ri; 5086 merge->buf_rj = buf_rj; 5087 merge->coi = NULL; 5088 merge->coj = NULL; 5089 merge->owners_co = NULL; 5090 5091 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5092 5093 /* attach the supporting struct to B_mpi for reuse */ 5094 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5095 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5096 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 5097 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5098 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5099 *mpimat = B_mpi; 5100 5101 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5102 PetscFunctionReturn(0); 5103 } 5104 5105 /*@C 5106 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5107 matrices from each processor 5108 5109 Collective 5110 5111 Input Parameters: 5112 + comm - the communicators the parallel matrix will live on 5113 . seqmat - the input sequential matrices 5114 . m - number of local rows (or PETSC_DECIDE) 5115 . n - number of local columns (or PETSC_DECIDE) 5116 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5117 5118 Output Parameter: 5119 . mpimat - the parallel matrix generated 5120 5121 Level: advanced 5122 5123 Notes: 5124 The dimensions of the sequential matrix in each processor MUST be the same. 5125 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5126 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5127 @*/ 5128 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5129 { 5130 PetscErrorCode ierr; 5131 PetscMPIInt size; 5132 5133 PetscFunctionBegin; 5134 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5135 if (size == 1) { 5136 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5137 if (scall == MAT_INITIAL_MATRIX) { 5138 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5139 } else { 5140 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5141 } 5142 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5143 PetscFunctionReturn(0); 5144 } 5145 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5146 if (scall == MAT_INITIAL_MATRIX) { 5147 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5148 } 5149 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5150 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5151 PetscFunctionReturn(0); 5152 } 5153 5154 /*@ 5155 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5156 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5157 with MatGetSize() 5158 5159 Not Collective 5160 5161 Input Parameters: 5162 + A - the matrix 5163 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5164 5165 Output Parameter: 5166 . A_loc - the local sequential matrix generated 5167 5168 Level: developer 5169 5170 Notes: 5171 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5172 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5173 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5174 modify the values of the returned A_loc. 5175 5176 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 5177 @*/ 5178 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5179 { 5180 PetscErrorCode ierr; 5181 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5182 Mat_SeqAIJ *mat,*a,*b; 5183 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5184 MatScalar *aa,*ba,*cam; 5185 PetscScalar *ca; 5186 PetscMPIInt size; 5187 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5188 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5189 PetscBool match; 5190 5191 PetscFunctionBegin; 5192 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5193 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5194 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 5195 if (size == 1) { 5196 if (scall == MAT_INITIAL_MATRIX) { 5197 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5198 *A_loc = mpimat->A; 5199 } else if (scall == MAT_REUSE_MATRIX) { 5200 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5201 } 5202 PetscFunctionReturn(0); 5203 } 5204 5205 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5206 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5207 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5208 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5209 aa = a->a; ba = b->a; 5210 if (scall == MAT_INITIAL_MATRIX) { 5211 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5212 ci[0] = 0; 5213 for (i=0; i<am; i++) { 5214 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5215 } 5216 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5217 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5218 k = 0; 5219 for (i=0; i<am; i++) { 5220 ncols_o = bi[i+1] - bi[i]; 5221 ncols_d = ai[i+1] - ai[i]; 5222 /* off-diagonal portion of A */ 5223 for (jo=0; jo<ncols_o; jo++) { 5224 col = cmap[*bj]; 5225 if (col >= cstart) break; 5226 cj[k] = col; bj++; 5227 ca[k++] = *ba++; 5228 } 5229 /* diagonal portion of A */ 5230 for (j=0; j<ncols_d; j++) { 5231 cj[k] = cstart + *aj++; 5232 ca[k++] = *aa++; 5233 } 5234 /* off-diagonal portion of A */ 5235 for (j=jo; j<ncols_o; j++) { 5236 cj[k] = cmap[*bj++]; 5237 ca[k++] = *ba++; 5238 } 5239 } 5240 /* put together the new matrix */ 5241 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5242 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5243 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5244 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5245 mat->free_a = PETSC_TRUE; 5246 mat->free_ij = PETSC_TRUE; 5247 mat->nonew = 0; 5248 } else if (scall == MAT_REUSE_MATRIX) { 5249 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5250 ci = mat->i; cj = mat->j; cam = mat->a; 5251 for (i=0; i<am; i++) { 5252 /* off-diagonal portion of A */ 5253 ncols_o = bi[i+1] - bi[i]; 5254 for (jo=0; jo<ncols_o; jo++) { 5255 col = cmap[*bj]; 5256 if (col >= cstart) break; 5257 *cam++ = *ba++; bj++; 5258 } 5259 /* diagonal portion of A */ 5260 ncols_d = ai[i+1] - ai[i]; 5261 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5262 /* off-diagonal portion of A */ 5263 for (j=jo; j<ncols_o; j++) { 5264 *cam++ = *ba++; bj++; 5265 } 5266 } 5267 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5268 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5269 PetscFunctionReturn(0); 5270 } 5271 5272 /*@ 5273 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5274 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5275 5276 Not Collective 5277 5278 Input Parameters: 5279 + A - the matrix 5280 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5281 5282 Output Parameter: 5283 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5284 - A_loc - the local sequential matrix generated 5285 5286 Level: developer 5287 5288 Notes: 5289 This is different from MatMPIAIJGetLocalMat since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5290 5291 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5292 5293 @*/ 5294 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5295 { 5296 PetscErrorCode ierr; 5297 Mat Ao,Ad; 5298 const PetscInt *cmap; 5299 PetscMPIInt size; 5300 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5301 5302 PetscFunctionBegin; 5303 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr); 5304 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5305 if (size == 1) { 5306 if (scall == MAT_INITIAL_MATRIX) { 5307 ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr); 5308 *A_loc = Ad; 5309 } else if (scall == MAT_REUSE_MATRIX) { 5310 ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5311 } 5312 if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); } 5313 PetscFunctionReturn(0); 5314 } 5315 ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr); 5316 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5317 if (f) { 5318 ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr); 5319 } else { 5320 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5321 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5322 Mat_SeqAIJ *c; 5323 PetscInt *ai = a->i, *aj = a->j; 5324 PetscInt *bi = b->i, *bj = b->j; 5325 PetscInt *ci,*cj; 5326 const PetscScalar *aa,*ba; 5327 PetscScalar *ca; 5328 PetscInt i,j,am,dn,on; 5329 5330 ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr); 5331 ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr); 5332 ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr); 5333 ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr); 5334 if (scall == MAT_INITIAL_MATRIX) { 5335 PetscInt k; 5336 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5337 ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr); 5338 ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr); 5339 ci[0] = 0; 5340 for (i=0,k=0; i<am; i++) { 5341 const PetscInt ncols_o = bi[i+1] - bi[i]; 5342 const PetscInt ncols_d = ai[i+1] - ai[i]; 5343 ci[i+1] = ci[i] + ncols_o + ncols_d; 5344 /* diagonal portion of A */ 5345 for (j=0; j<ncols_d; j++,k++) { 5346 cj[k] = *aj++; 5347 ca[k] = *aa++; 5348 } 5349 /* off-diagonal portion of A */ 5350 for (j=0; j<ncols_o; j++,k++) { 5351 cj[k] = dn + *bj++; 5352 ca[k] = *ba++; 5353 } 5354 } 5355 /* put together the new matrix */ 5356 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr); 5357 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5358 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5359 c = (Mat_SeqAIJ*)(*A_loc)->data; 5360 c->free_a = PETSC_TRUE; 5361 c->free_ij = PETSC_TRUE; 5362 c->nonew = 0; 5363 ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr); 5364 } else if (scall == MAT_REUSE_MATRIX) { 5365 #if defined(PETSC_HAVE_DEVICE) 5366 (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU; 5367 #endif 5368 c = (Mat_SeqAIJ*)(*A_loc)->data; 5369 ca = c->a; 5370 for (i=0; i<am; i++) { 5371 const PetscInt ncols_d = ai[i+1] - ai[i]; 5372 const PetscInt ncols_o = bi[i+1] - bi[i]; 5373 /* diagonal portion of A */ 5374 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5375 /* off-diagonal portion of A */ 5376 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5377 } 5378 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5379 ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr); 5380 ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr); 5381 if (glob) { 5382 PetscInt cst, *gidx; 5383 5384 ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr); 5385 ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr); 5386 for (i=0; i<dn; i++) gidx[i] = cst + i; 5387 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5388 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr); 5389 } 5390 } 5391 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5392 PetscFunctionReturn(0); 5393 } 5394 5395 /*@C 5396 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5397 5398 Not Collective 5399 5400 Input Parameters: 5401 + A - the matrix 5402 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5403 - row, col - index sets of rows and columns to extract (or NULL) 5404 5405 Output Parameter: 5406 . A_loc - the local sequential matrix generated 5407 5408 Level: developer 5409 5410 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5411 5412 @*/ 5413 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5414 { 5415 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5416 PetscErrorCode ierr; 5417 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5418 IS isrowa,iscola; 5419 Mat *aloc; 5420 PetscBool match; 5421 5422 PetscFunctionBegin; 5423 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5424 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5425 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5426 if (!row) { 5427 start = A->rmap->rstart; end = A->rmap->rend; 5428 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5429 } else { 5430 isrowa = *row; 5431 } 5432 if (!col) { 5433 start = A->cmap->rstart; 5434 cmap = a->garray; 5435 nzA = a->A->cmap->n; 5436 nzB = a->B->cmap->n; 5437 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5438 ncols = 0; 5439 for (i=0; i<nzB; i++) { 5440 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5441 else break; 5442 } 5443 imark = i; 5444 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5445 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5446 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5447 } else { 5448 iscola = *col; 5449 } 5450 if (scall != MAT_INITIAL_MATRIX) { 5451 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5452 aloc[0] = *A_loc; 5453 } 5454 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5455 if (!col) { /* attach global id of condensed columns */ 5456 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5457 } 5458 *A_loc = aloc[0]; 5459 ierr = PetscFree(aloc);CHKERRQ(ierr); 5460 if (!row) { 5461 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5462 } 5463 if (!col) { 5464 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5465 } 5466 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5467 PetscFunctionReturn(0); 5468 } 5469 5470 /* 5471 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5472 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5473 * on a global size. 5474 * */ 5475 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5476 { 5477 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5478 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5479 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5480 PetscMPIInt owner; 5481 PetscSFNode *iremote,*oiremote; 5482 const PetscInt *lrowindices; 5483 PetscErrorCode ierr; 5484 PetscSF sf,osf; 5485 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5486 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5487 MPI_Comm comm; 5488 ISLocalToGlobalMapping mapping; 5489 5490 PetscFunctionBegin; 5491 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5492 /* plocalsize is the number of roots 5493 * nrows is the number of leaves 5494 * */ 5495 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5496 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5497 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5498 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5499 for (i=0;i<nrows;i++) { 5500 /* Find a remote index and an owner for a row 5501 * The row could be local or remote 5502 * */ 5503 owner = 0; 5504 lidx = 0; 5505 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5506 iremote[i].index = lidx; 5507 iremote[i].rank = owner; 5508 } 5509 /* Create SF to communicate how many nonzero columns for each row */ 5510 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5511 /* SF will figure out the number of nonzero colunms for each row, and their 5512 * offsets 5513 * */ 5514 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5515 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5516 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5517 5518 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5519 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5520 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5521 roffsets[0] = 0; 5522 roffsets[1] = 0; 5523 for (i=0;i<plocalsize;i++) { 5524 /* diag */ 5525 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5526 /* off diag */ 5527 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5528 /* compute offsets so that we relative location for each row */ 5529 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5530 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5531 } 5532 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5533 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5534 /* 'r' means root, and 'l' means leaf */ 5535 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5536 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5537 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5538 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5539 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5540 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5541 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5542 dntotalcols = 0; 5543 ontotalcols = 0; 5544 ncol = 0; 5545 for (i=0;i<nrows;i++) { 5546 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5547 ncol = PetscMax(pnnz[i],ncol); 5548 /* diag */ 5549 dntotalcols += nlcols[i*2+0]; 5550 /* off diag */ 5551 ontotalcols += nlcols[i*2+1]; 5552 } 5553 /* We do not need to figure the right number of columns 5554 * since all the calculations will be done by going through the raw data 5555 * */ 5556 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5557 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5558 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5559 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5560 /* diag */ 5561 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5562 /* off diag */ 5563 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5564 /* diag */ 5565 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5566 /* off diag */ 5567 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5568 dntotalcols = 0; 5569 ontotalcols = 0; 5570 ntotalcols = 0; 5571 for (i=0;i<nrows;i++) { 5572 owner = 0; 5573 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5574 /* Set iremote for diag matrix */ 5575 for (j=0;j<nlcols[i*2+0];j++) { 5576 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5577 iremote[dntotalcols].rank = owner; 5578 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5579 ilocal[dntotalcols++] = ntotalcols++; 5580 } 5581 /* off diag */ 5582 for (j=0;j<nlcols[i*2+1];j++) { 5583 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5584 oiremote[ontotalcols].rank = owner; 5585 oilocal[ontotalcols++] = ntotalcols++; 5586 } 5587 } 5588 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5589 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5590 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5591 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5592 /* P serves as roots and P_oth is leaves 5593 * Diag matrix 5594 * */ 5595 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5596 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5597 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5598 5599 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5600 /* Off diag */ 5601 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5602 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5603 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5604 /* We operate on the matrix internal data for saving memory */ 5605 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5606 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5607 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5608 /* Convert to global indices for diag matrix */ 5609 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5610 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5611 /* We want P_oth store global indices */ 5612 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5613 /* Use memory scalable approach */ 5614 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5615 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5616 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5617 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5618 /* Convert back to local indices */ 5619 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5620 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5621 nout = 0; 5622 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5623 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5624 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5625 /* Exchange values */ 5626 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5627 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5628 /* Stop PETSc from shrinking memory */ 5629 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5630 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5631 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5633 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5634 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5635 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5636 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5637 PetscFunctionReturn(0); 5638 } 5639 5640 /* 5641 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5642 * This supports MPIAIJ and MAIJ 5643 * */ 5644 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5645 { 5646 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5647 Mat_SeqAIJ *p_oth; 5648 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5649 IS rows,map; 5650 PetscHMapI hamp; 5651 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5652 MPI_Comm comm; 5653 PetscSF sf,osf; 5654 PetscBool has; 5655 PetscErrorCode ierr; 5656 5657 PetscFunctionBegin; 5658 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5659 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5660 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5661 * and then create a submatrix (that often is an overlapping matrix) 5662 * */ 5663 if (reuse == MAT_INITIAL_MATRIX) { 5664 /* Use a hash table to figure out unique keys */ 5665 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5666 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5667 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5668 count = 0; 5669 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5670 for (i=0;i<a->B->cmap->n;i++) { 5671 key = a->garray[i]/dof; 5672 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5673 if (!has) { 5674 mapping[i] = count; 5675 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5676 } else { 5677 /* Current 'i' has the same value the previous step */ 5678 mapping[i] = count-1; 5679 } 5680 } 5681 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5682 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5683 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5684 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5685 off = 0; 5686 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5687 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5688 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5689 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5690 /* In case, the matrix was already created but users want to recreate the matrix */ 5691 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5692 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5693 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5694 ierr = ISDestroy(&map);CHKERRQ(ierr); 5695 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5696 } else if (reuse == MAT_REUSE_MATRIX) { 5697 /* If matrix was already created, we simply update values using SF objects 5698 * that as attached to the matrix ealier. 5699 * */ 5700 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5701 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5702 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5703 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5704 /* Update values in place */ 5705 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5706 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5707 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5708 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5709 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5710 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5711 PetscFunctionReturn(0); 5712 } 5713 5714 /*@C 5715 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5716 5717 Collective on Mat 5718 5719 Input Parameters: 5720 + A,B - the matrices in mpiaij format 5721 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5722 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5723 5724 Output Parameter: 5725 + rowb, colb - index sets of rows and columns of B to extract 5726 - B_seq - the sequential matrix generated 5727 5728 Level: developer 5729 5730 @*/ 5731 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5732 { 5733 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5734 PetscErrorCode ierr; 5735 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5736 IS isrowb,iscolb; 5737 Mat *bseq=NULL; 5738 5739 PetscFunctionBegin; 5740 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5741 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5742 } 5743 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5744 5745 if (scall == MAT_INITIAL_MATRIX) { 5746 start = A->cmap->rstart; 5747 cmap = a->garray; 5748 nzA = a->A->cmap->n; 5749 nzB = a->B->cmap->n; 5750 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5751 ncols = 0; 5752 for (i=0; i<nzB; i++) { /* row < local row index */ 5753 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5754 else break; 5755 } 5756 imark = i; 5757 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5758 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5759 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5760 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5761 } else { 5762 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5763 isrowb = *rowb; iscolb = *colb; 5764 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5765 bseq[0] = *B_seq; 5766 } 5767 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5768 *B_seq = bseq[0]; 5769 ierr = PetscFree(bseq);CHKERRQ(ierr); 5770 if (!rowb) { 5771 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5772 } else { 5773 *rowb = isrowb; 5774 } 5775 if (!colb) { 5776 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5777 } else { 5778 *colb = iscolb; 5779 } 5780 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5781 PetscFunctionReturn(0); 5782 } 5783 5784 /* 5785 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5786 of the OFF-DIAGONAL portion of local A 5787 5788 Collective on Mat 5789 5790 Input Parameters: 5791 + A,B - the matrices in mpiaij format 5792 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5793 5794 Output Parameter: 5795 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5796 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5797 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5798 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5799 5800 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5801 for this matrix. This is not desirable.. 5802 5803 Level: developer 5804 5805 */ 5806 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5807 { 5808 PetscErrorCode ierr; 5809 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5810 Mat_SeqAIJ *b_oth; 5811 VecScatter ctx; 5812 MPI_Comm comm; 5813 const PetscMPIInt *rprocs,*sprocs; 5814 const PetscInt *srow,*rstarts,*sstarts; 5815 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5816 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5817 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5818 MPI_Request *rwaits = NULL,*swaits = NULL; 5819 MPI_Status rstatus; 5820 PetscMPIInt size,tag,rank,nsends_mpi,nrecvs_mpi; 5821 PETSC_UNUSED PetscMPIInt jj; 5822 5823 PetscFunctionBegin; 5824 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5825 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5826 5827 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5828 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5829 } 5830 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5831 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 5832 5833 if (size == 1) { 5834 startsj_s = NULL; 5835 bufa_ptr = NULL; 5836 *B_oth = NULL; 5837 PetscFunctionReturn(0); 5838 } 5839 5840 ctx = a->Mvctx; 5841 tag = ((PetscObject)ctx)->tag; 5842 5843 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5844 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5845 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5846 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5847 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5848 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5849 5850 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5851 if (scall == MAT_INITIAL_MATRIX) { 5852 /* i-array */ 5853 /*---------*/ 5854 /* post receives */ 5855 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5856 for (i=0; i<nrecvs; i++) { 5857 rowlen = rvalues + rstarts[i]*rbs; 5858 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5859 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5860 } 5861 5862 /* pack the outgoing message */ 5863 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5864 5865 sstartsj[0] = 0; 5866 rstartsj[0] = 0; 5867 len = 0; /* total length of j or a array to be sent */ 5868 if (nsends) { 5869 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5870 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5871 } 5872 for (i=0; i<nsends; i++) { 5873 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5874 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5875 for (j=0; j<nrows; j++) { 5876 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5877 for (l=0; l<sbs; l++) { 5878 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5879 5880 rowlen[j*sbs+l] = ncols; 5881 5882 len += ncols; 5883 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5884 } 5885 k++; 5886 } 5887 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5888 5889 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5890 } 5891 /* recvs and sends of i-array are completed */ 5892 i = nrecvs; 5893 while (i--) { 5894 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5895 } 5896 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5897 ierr = PetscFree(svalues);CHKERRQ(ierr); 5898 5899 /* allocate buffers for sending j and a arrays */ 5900 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5901 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5902 5903 /* create i-array of B_oth */ 5904 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5905 5906 b_othi[0] = 0; 5907 len = 0; /* total length of j or a array to be received */ 5908 k = 0; 5909 for (i=0; i<nrecvs; i++) { 5910 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5911 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5912 for (j=0; j<nrows; j++) { 5913 b_othi[k+1] = b_othi[k] + rowlen[j]; 5914 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5915 k++; 5916 } 5917 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5918 } 5919 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5920 5921 /* allocate space for j and a arrrays of B_oth */ 5922 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5923 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5924 5925 /* j-array */ 5926 /*---------*/ 5927 /* post receives of j-array */ 5928 for (i=0; i<nrecvs; i++) { 5929 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5930 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5931 } 5932 5933 /* pack the outgoing message j-array */ 5934 if (nsends) k = sstarts[0]; 5935 for (i=0; i<nsends; i++) { 5936 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5937 bufJ = bufj+sstartsj[i]; 5938 for (j=0; j<nrows; j++) { 5939 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5940 for (ll=0; ll<sbs; ll++) { 5941 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5942 for (l=0; l<ncols; l++) { 5943 *bufJ++ = cols[l]; 5944 } 5945 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5946 } 5947 } 5948 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5949 } 5950 5951 /* recvs and sends of j-array are completed */ 5952 i = nrecvs; 5953 while (i--) { 5954 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5955 } 5956 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5957 } else if (scall == MAT_REUSE_MATRIX) { 5958 sstartsj = *startsj_s; 5959 rstartsj = *startsj_r; 5960 bufa = *bufa_ptr; 5961 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5962 b_otha = b_oth->a; 5963 #if defined(PETSC_HAVE_DEVICE) 5964 (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU; 5965 #endif 5966 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5967 5968 /* a-array */ 5969 /*---------*/ 5970 /* post receives of a-array */ 5971 for (i=0; i<nrecvs; i++) { 5972 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5973 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5974 } 5975 5976 /* pack the outgoing message a-array */ 5977 if (nsends) k = sstarts[0]; 5978 for (i=0; i<nsends; i++) { 5979 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5980 bufA = bufa+sstartsj[i]; 5981 for (j=0; j<nrows; j++) { 5982 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5983 for (ll=0; ll<sbs; ll++) { 5984 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5985 for (l=0; l<ncols; l++) { 5986 *bufA++ = vals[l]; 5987 } 5988 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5989 } 5990 } 5991 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr); 5992 } 5993 /* recvs and sends of a-array are completed */ 5994 i = nrecvs; 5995 while (i--) { 5996 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr); 5997 } 5998 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);} 5999 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 6000 6001 if (scall == MAT_INITIAL_MATRIX) { 6002 /* put together the new matrix */ 6003 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 6004 6005 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6006 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6007 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 6008 b_oth->free_a = PETSC_TRUE; 6009 b_oth->free_ij = PETSC_TRUE; 6010 b_oth->nonew = 0; 6011 6012 ierr = PetscFree(bufj);CHKERRQ(ierr); 6013 if (!startsj_s || !bufa_ptr) { 6014 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 6015 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 6016 } else { 6017 *startsj_s = sstartsj; 6018 *startsj_r = rstartsj; 6019 *bufa_ptr = bufa; 6020 } 6021 } 6022 6023 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 6024 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 6025 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 6026 PetscFunctionReturn(0); 6027 } 6028 6029 /*@C 6030 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 6031 6032 Not Collective 6033 6034 Input Parameters: 6035 . A - The matrix in mpiaij format 6036 6037 Output Parameter: 6038 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 6039 . colmap - A map from global column index to local index into lvec 6040 - multScatter - A scatter from the argument of a matrix-vector product to lvec 6041 6042 Level: developer 6043 6044 @*/ 6045 #if defined(PETSC_USE_CTABLE) 6046 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 6047 #else 6048 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 6049 #endif 6050 { 6051 Mat_MPIAIJ *a; 6052 6053 PetscFunctionBegin; 6054 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 6055 PetscValidPointer(lvec, 2); 6056 PetscValidPointer(colmap, 3); 6057 PetscValidPointer(multScatter, 4); 6058 a = (Mat_MPIAIJ*) A->data; 6059 if (lvec) *lvec = a->lvec; 6060 if (colmap) *colmap = a->colmap; 6061 if (multScatter) *multScatter = a->Mvctx; 6062 PetscFunctionReturn(0); 6063 } 6064 6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 6068 #if defined(PETSC_HAVE_MKL_SPARSE) 6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 6070 #endif 6071 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 6073 #if defined(PETSC_HAVE_ELEMENTAL) 6074 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 6075 #endif 6076 #if defined(PETSC_HAVE_SCALAPACK) 6077 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 6078 #endif 6079 #if defined(PETSC_HAVE_HYPRE) 6080 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 6081 #endif 6082 #if defined(PETSC_HAVE_CUDA) 6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 6084 #endif 6085 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6086 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 6087 #endif 6088 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 6089 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 6090 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6091 6092 /* 6093 Computes (B'*A')' since computing B*A directly is untenable 6094 6095 n p p 6096 [ ] [ ] [ ] 6097 m [ A ] * n [ B ] = m [ C ] 6098 [ ] [ ] [ ] 6099 6100 */ 6101 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 6102 { 6103 PetscErrorCode ierr; 6104 Mat At,Bt,Ct; 6105 6106 PetscFunctionBegin; 6107 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 6108 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 6109 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 6110 ierr = MatDestroy(&At);CHKERRQ(ierr); 6111 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 6112 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 6113 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 6114 PetscFunctionReturn(0); 6115 } 6116 6117 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6118 { 6119 PetscErrorCode ierr; 6120 PetscBool cisdense; 6121 6122 PetscFunctionBegin; 6123 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 6124 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 6125 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6126 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 6127 if (!cisdense) { 6128 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6129 } 6130 ierr = MatSetUp(C);CHKERRQ(ierr); 6131 6132 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6133 PetscFunctionReturn(0); 6134 } 6135 6136 /* ----------------------------------------------------------------*/ 6137 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6138 { 6139 Mat_Product *product = C->product; 6140 Mat A = product->A,B=product->B; 6141 6142 PetscFunctionBegin; 6143 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6144 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6145 6146 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6147 C->ops->productsymbolic = MatProductSymbolic_AB; 6148 PetscFunctionReturn(0); 6149 } 6150 6151 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6152 { 6153 PetscErrorCode ierr; 6154 Mat_Product *product = C->product; 6155 6156 PetscFunctionBegin; 6157 if (product->type == MATPRODUCT_AB) { 6158 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6159 } 6160 PetscFunctionReturn(0); 6161 } 6162 /* ----------------------------------------------------------------*/ 6163 6164 /*MC 6165 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6166 6167 Options Database Keys: 6168 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6169 6170 Level: beginner 6171 6172 Notes: 6173 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6174 in this case the values associated with the rows and columns one passes in are set to zero 6175 in the matrix 6176 6177 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6178 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6179 6180 .seealso: MatCreateAIJ() 6181 M*/ 6182 6183 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6184 { 6185 Mat_MPIAIJ *b; 6186 PetscErrorCode ierr; 6187 PetscMPIInt size; 6188 6189 PetscFunctionBegin; 6190 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 6191 6192 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6193 B->data = (void*)b; 6194 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6195 B->assembled = PETSC_FALSE; 6196 B->insertmode = NOT_SET_VALUES; 6197 b->size = size; 6198 6199 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 6200 6201 /* build cache for off array entries formed */ 6202 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6203 6204 b->donotstash = PETSC_FALSE; 6205 b->colmap = NULL; 6206 b->garray = NULL; 6207 b->roworiented = PETSC_TRUE; 6208 6209 /* stuff used for matrix vector multiply */ 6210 b->lvec = NULL; 6211 b->Mvctx = NULL; 6212 6213 /* stuff for MatGetRow() */ 6214 b->rowindices = NULL; 6215 b->rowvalues = NULL; 6216 b->getrowactive = PETSC_FALSE; 6217 6218 /* flexible pointer used in CUSPARSE classes */ 6219 b->spptr = NULL; 6220 6221 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6222 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6223 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6224 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6225 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6226 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6227 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6228 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6229 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6230 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6231 #if defined(PETSC_HAVE_CUDA) 6232 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6233 #endif 6234 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6235 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6236 #endif 6237 #if defined(PETSC_HAVE_MKL_SPARSE) 6238 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6239 #endif 6240 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6241 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6242 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6243 #if defined(PETSC_HAVE_ELEMENTAL) 6244 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6245 #endif 6246 #if defined(PETSC_HAVE_SCALAPACK) 6247 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6248 #endif 6249 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6250 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6251 #if defined(PETSC_HAVE_HYPRE) 6252 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6253 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6254 #endif 6255 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6256 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6257 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6258 PetscFunctionReturn(0); 6259 } 6260 6261 /*@C 6262 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6263 and "off-diagonal" part of the matrix in CSR format. 6264 6265 Collective 6266 6267 Input Parameters: 6268 + comm - MPI communicator 6269 . m - number of local rows (Cannot be PETSC_DECIDE) 6270 . n - This value should be the same as the local size used in creating the 6271 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6272 calculated if N is given) For square matrices n is almost always m. 6273 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6274 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6275 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6276 . j - column indices 6277 . a - matrix values 6278 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6279 . oj - column indices 6280 - oa - matrix values 6281 6282 Output Parameter: 6283 . mat - the matrix 6284 6285 Level: advanced 6286 6287 Notes: 6288 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6289 must free the arrays once the matrix has been destroyed and not before. 6290 6291 The i and j indices are 0 based 6292 6293 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6294 6295 This sets local rows and cannot be used to set off-processor values. 6296 6297 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6298 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6299 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6300 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6301 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6302 communication if it is known that only local entries will be set. 6303 6304 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6305 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6306 @*/ 6307 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6308 { 6309 PetscErrorCode ierr; 6310 Mat_MPIAIJ *maij; 6311 6312 PetscFunctionBegin; 6313 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6314 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6315 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6316 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6317 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6318 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6319 maij = (Mat_MPIAIJ*) (*mat)->data; 6320 6321 (*mat)->preallocated = PETSC_TRUE; 6322 6323 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6324 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6325 6326 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6327 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6328 6329 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6330 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6331 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6332 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6333 6334 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6335 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6336 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6337 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6338 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6339 PetscFunctionReturn(0); 6340 } 6341 6342 /* 6343 Special version for direct calls from Fortran 6344 */ 6345 #include <petsc/private/fortranimpl.h> 6346 6347 /* Change these macros so can be used in void function */ 6348 #undef CHKERRQ 6349 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6350 #undef SETERRQ2 6351 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6352 #undef SETERRQ3 6353 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6354 #undef SETERRQ 6355 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6356 6357 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6358 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6359 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6360 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6361 #else 6362 #endif 6363 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6364 { 6365 Mat mat = *mmat; 6366 PetscInt m = *mm, n = *mn; 6367 InsertMode addv = *maddv; 6368 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6369 PetscScalar value; 6370 PetscErrorCode ierr; 6371 6372 MatCheckPreallocated(mat,1); 6373 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6374 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6375 { 6376 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6377 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6378 PetscBool roworiented = aij->roworiented; 6379 6380 /* Some Variables required in the macro */ 6381 Mat A = aij->A; 6382 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6383 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6384 MatScalar *aa = a->a; 6385 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6386 Mat B = aij->B; 6387 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6388 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6389 MatScalar *ba = b->a; 6390 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6391 * cannot use "#if defined" inside a macro. */ 6392 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6393 6394 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6395 PetscInt nonew = a->nonew; 6396 MatScalar *ap1,*ap2; 6397 6398 PetscFunctionBegin; 6399 for (i=0; i<m; i++) { 6400 if (im[i] < 0) continue; 6401 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6402 if (im[i] >= rstart && im[i] < rend) { 6403 row = im[i] - rstart; 6404 lastcol1 = -1; 6405 rp1 = aj + ai[row]; 6406 ap1 = aa + ai[row]; 6407 rmax1 = aimax[row]; 6408 nrow1 = ailen[row]; 6409 low1 = 0; 6410 high1 = nrow1; 6411 lastcol2 = -1; 6412 rp2 = bj + bi[row]; 6413 ap2 = ba + bi[row]; 6414 rmax2 = bimax[row]; 6415 nrow2 = bilen[row]; 6416 low2 = 0; 6417 high2 = nrow2; 6418 6419 for (j=0; j<n; j++) { 6420 if (roworiented) value = v[i*n+j]; 6421 else value = v[i+j*m]; 6422 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6423 if (in[j] >= cstart && in[j] < cend) { 6424 col = in[j] - cstart; 6425 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6426 #if defined(PETSC_HAVE_DEVICE) 6427 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6428 #endif 6429 } else if (in[j] < 0) continue; 6430 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6431 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6432 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6433 } else { 6434 if (mat->was_assembled) { 6435 if (!aij->colmap) { 6436 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6437 } 6438 #if defined(PETSC_USE_CTABLE) 6439 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6440 col--; 6441 #else 6442 col = aij->colmap[in[j]] - 1; 6443 #endif 6444 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6445 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6446 col = in[j]; 6447 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6448 B = aij->B; 6449 b = (Mat_SeqAIJ*)B->data; 6450 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6451 rp2 = bj + bi[row]; 6452 ap2 = ba + bi[row]; 6453 rmax2 = bimax[row]; 6454 nrow2 = bilen[row]; 6455 low2 = 0; 6456 high2 = nrow2; 6457 bm = aij->B->rmap->n; 6458 ba = b->a; 6459 inserted = PETSC_FALSE; 6460 } 6461 } else col = in[j]; 6462 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6463 #if defined(PETSC_HAVE_DEVICE) 6464 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6465 #endif 6466 } 6467 } 6468 } else if (!aij->donotstash) { 6469 if (roworiented) { 6470 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6471 } else { 6472 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6473 } 6474 } 6475 } 6476 } 6477 PetscFunctionReturnVoid(); 6478 } 6479