1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_DEVICE) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_DEVICE) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_DEVICE) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_DEVICE) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_DEVICE) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_DEVICE) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = NULL; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_DEVICE) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1282 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_CUDA) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_ELEMENTAL) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1302 #endif 1303 #if defined(PETSC_HAVE_SCALAPACK) 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1305 #endif 1306 #if defined(PETSC_HAVE_HYPRE) 1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1309 #endif 1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1312 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr); 1314 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr); 1316 #if defined(PETSC_HAVE_MKL_SPARSE) 1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr); 1318 #endif 1319 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr); 1320 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1321 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1326 { 1327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1328 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1329 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1330 const PetscInt *garray = aij->garray; 1331 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1332 PetscInt *rowlens; 1333 PetscInt *colidxs; 1334 PetscScalar *matvals; 1335 PetscErrorCode ierr; 1336 1337 PetscFunctionBegin; 1338 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1339 1340 M = mat->rmap->N; 1341 N = mat->cmap->N; 1342 m = mat->rmap->n; 1343 rs = mat->rmap->rstart; 1344 cs = mat->cmap->rstart; 1345 nz = A->nz + B->nz; 1346 1347 /* write matrix header */ 1348 header[0] = MAT_FILE_CLASSID; 1349 header[1] = M; header[2] = N; header[3] = nz; 1350 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1351 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1352 1353 /* fill in and store row lengths */ 1354 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1355 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1356 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1357 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1358 1359 /* fill in and store column indices */ 1360 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1361 for (cnt=0, i=0; i<m; i++) { 1362 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1363 if (garray[B->j[jb]] > cs) break; 1364 colidxs[cnt++] = garray[B->j[jb]]; 1365 } 1366 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1367 colidxs[cnt++] = A->j[ja] + cs; 1368 for (; jb<B->i[i+1]; jb++) 1369 colidxs[cnt++] = garray[B->j[jb]]; 1370 } 1371 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1372 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1373 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1374 1375 /* fill in and store nonzero values */ 1376 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1377 for (cnt=0, i=0; i<m; i++) { 1378 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1379 if (garray[B->j[jb]] > cs) break; 1380 matvals[cnt++] = B->a[jb]; 1381 } 1382 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1383 matvals[cnt++] = A->a[ja]; 1384 for (; jb<B->i[i+1]; jb++) 1385 matvals[cnt++] = B->a[jb]; 1386 } 1387 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1388 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1389 ierr = PetscFree(matvals);CHKERRQ(ierr); 1390 1391 /* write block size option to the viewer's .info file */ 1392 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #include <petscdraw.h> 1397 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1398 { 1399 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1400 PetscErrorCode ierr; 1401 PetscMPIInt rank = aij->rank,size = aij->size; 1402 PetscBool isdraw,iascii,isbinary; 1403 PetscViewer sviewer; 1404 PetscViewerFormat format; 1405 1406 PetscFunctionBegin; 1407 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1408 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1409 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1410 if (iascii) { 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1413 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1414 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1415 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1416 for (i=0; i<(PetscInt)size; i++) { 1417 nmax = PetscMax(nmax,nz[i]); 1418 nmin = PetscMin(nmin,nz[i]); 1419 navg += nz[i]; 1420 } 1421 ierr = PetscFree(nz);CHKERRQ(ierr); 1422 navg = navg/size; 1423 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1427 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1428 MatInfo info; 1429 PetscBool inodes; 1430 1431 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1432 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1433 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1434 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1435 if (!inodes) { 1436 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1437 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1438 } else { 1439 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1440 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1441 } 1442 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1443 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1444 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1445 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1446 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1447 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1449 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1450 PetscFunctionReturn(0); 1451 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1452 PetscInt inodecount,inodelimit,*inodes; 1453 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1454 if (inodes) { 1455 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1456 } else { 1457 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1458 } 1459 PetscFunctionReturn(0); 1460 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1461 PetscFunctionReturn(0); 1462 } 1463 } else if (isbinary) { 1464 if (size == 1) { 1465 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1466 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1467 } else { 1468 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } else if (iascii && size == 1) { 1472 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1473 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } else if (isdraw) { 1476 PetscDraw draw; 1477 PetscBool isnull; 1478 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1479 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1480 if (isnull) PetscFunctionReturn(0); 1481 } 1482 1483 { /* assemble the entire matrix onto first processor */ 1484 Mat A = NULL, Av; 1485 IS isrow,iscol; 1486 1487 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1488 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1489 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1490 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1491 /* The commented code uses MatCreateSubMatrices instead */ 1492 /* 1493 Mat *AA, A = NULL, Av; 1494 IS isrow,iscol; 1495 1496 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1497 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1498 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1499 if (!rank) { 1500 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1501 A = AA[0]; 1502 Av = AA[0]; 1503 } 1504 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1505 */ 1506 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1507 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1508 /* 1509 Everyone has to call to draw the matrix since the graphics waits are 1510 synchronized across all processors that share the PetscDraw object 1511 */ 1512 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1513 if (!rank) { 1514 if (((PetscObject)mat)->name) { 1515 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1516 } 1517 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1518 } 1519 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1520 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1521 ierr = MatDestroy(&A);CHKERRQ(ierr); 1522 } 1523 PetscFunctionReturn(0); 1524 } 1525 1526 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1527 { 1528 PetscErrorCode ierr; 1529 PetscBool iascii,isdraw,issocket,isbinary; 1530 1531 PetscFunctionBegin; 1532 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1533 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1534 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1535 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1536 if (iascii || isdraw || isbinary || issocket) { 1537 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1538 } 1539 PetscFunctionReturn(0); 1540 } 1541 1542 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1543 { 1544 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1545 PetscErrorCode ierr; 1546 Vec bb1 = NULL; 1547 PetscBool hasop; 1548 1549 PetscFunctionBegin; 1550 if (flag == SOR_APPLY_UPPER) { 1551 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1556 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1557 } 1558 1559 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1560 if (flag & SOR_ZERO_INITIAL_GUESS) { 1561 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1562 its--; 1563 } 1564 1565 while (its--) { 1566 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 1569 /* update rhs: bb1 = bb - B*x */ 1570 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1571 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1572 1573 /* local sweep */ 1574 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1575 } 1576 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1577 if (flag & SOR_ZERO_INITIAL_GUESS) { 1578 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1579 its--; 1580 } 1581 while (its--) { 1582 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 1585 /* update rhs: bb1 = bb - B*x */ 1586 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1587 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1588 1589 /* local sweep */ 1590 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1591 } 1592 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1593 if (flag & SOR_ZERO_INITIAL_GUESS) { 1594 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1595 its--; 1596 } 1597 while (its--) { 1598 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1599 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 1601 /* update rhs: bb1 = bb - B*x */ 1602 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1603 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1604 1605 /* local sweep */ 1606 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1607 } 1608 } else if (flag & SOR_EISENSTAT) { 1609 Vec xx1; 1610 1611 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1613 1614 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1615 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1616 if (!mat->diag) { 1617 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1618 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1619 } 1620 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1621 if (hasop) { 1622 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1623 } else { 1624 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1625 } 1626 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1627 1628 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1629 1630 /* local sweep */ 1631 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1632 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1633 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1634 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1635 1636 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1637 1638 matin->factorerrortype = mat->A->factorerrortype; 1639 PetscFunctionReturn(0); 1640 } 1641 1642 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1643 { 1644 Mat aA,aB,Aperm; 1645 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1646 PetscScalar *aa,*ba; 1647 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1648 PetscSF rowsf,sf; 1649 IS parcolp = NULL; 1650 PetscBool done; 1651 PetscErrorCode ierr; 1652 1653 PetscFunctionBegin; 1654 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1655 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1656 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1657 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1658 1659 /* Invert row permutation to find out where my rows should go */ 1660 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1661 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1662 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1663 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1664 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1665 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1666 1667 /* Invert column permutation to find out where my columns should go */ 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1672 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1673 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1674 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1675 1676 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1677 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1678 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1679 1680 /* Find out where my gcols should go */ 1681 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1682 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1683 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1684 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1685 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1686 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1687 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1688 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1689 1690 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1691 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1692 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1693 for (i=0; i<m; i++) { 1694 PetscInt row = rdest[i]; 1695 PetscMPIInt rowner; 1696 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1697 for (j=ai[i]; j<ai[i+1]; j++) { 1698 PetscInt col = cdest[aj[j]]; 1699 PetscMPIInt cowner; 1700 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1701 if (rowner == cowner) dnnz[i]++; 1702 else onnz[i]++; 1703 } 1704 for (j=bi[i]; j<bi[i+1]; j++) { 1705 PetscInt col = gcdest[bj[j]]; 1706 PetscMPIInt cowner; 1707 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1708 if (rowner == cowner) dnnz[i]++; 1709 else onnz[i]++; 1710 } 1711 } 1712 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1713 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1714 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1715 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1716 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1717 1718 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1719 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1720 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1721 for (i=0; i<m; i++) { 1722 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1723 PetscInt j0,rowlen; 1724 rowlen = ai[i+1] - ai[i]; 1725 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1726 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1727 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1728 } 1729 rowlen = bi[i+1] - bi[i]; 1730 for (j0=j=0; j<rowlen; j0=j) { 1731 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1732 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1733 } 1734 } 1735 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1736 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1737 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1738 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1739 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1740 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1741 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1742 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1743 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1744 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1745 *B = Aperm; 1746 PetscFunctionReturn(0); 1747 } 1748 1749 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1750 { 1751 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1756 if (ghosts) *ghosts = aij->garray; 1757 PetscFunctionReturn(0); 1758 } 1759 1760 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1761 { 1762 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1763 Mat A = mat->A,B = mat->B; 1764 PetscErrorCode ierr; 1765 PetscLogDouble isend[5],irecv[5]; 1766 1767 PetscFunctionBegin; 1768 info->block_size = 1.0; 1769 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1770 1771 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1772 isend[3] = info->memory; isend[4] = info->mallocs; 1773 1774 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1775 1776 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1777 isend[3] += info->memory; isend[4] += info->mallocs; 1778 if (flag == MAT_LOCAL) { 1779 info->nz_used = isend[0]; 1780 info->nz_allocated = isend[1]; 1781 info->nz_unneeded = isend[2]; 1782 info->memory = isend[3]; 1783 info->mallocs = isend[4]; 1784 } else if (flag == MAT_GLOBAL_MAX) { 1785 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1786 1787 info->nz_used = irecv[0]; 1788 info->nz_allocated = irecv[1]; 1789 info->nz_unneeded = irecv[2]; 1790 info->memory = irecv[3]; 1791 info->mallocs = irecv[4]; 1792 } else if (flag == MAT_GLOBAL_SUM) { 1793 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1794 1795 info->nz_used = irecv[0]; 1796 info->nz_allocated = irecv[1]; 1797 info->nz_unneeded = irecv[2]; 1798 info->memory = irecv[3]; 1799 info->mallocs = irecv[4]; 1800 } 1801 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1802 info->fill_ratio_needed = 0; 1803 info->factor_mallocs = 0; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1808 { 1809 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1810 PetscErrorCode ierr; 1811 1812 PetscFunctionBegin; 1813 switch (op) { 1814 case MAT_NEW_NONZERO_LOCATIONS: 1815 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1816 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1817 case MAT_KEEP_NONZERO_PATTERN: 1818 case MAT_NEW_NONZERO_LOCATION_ERR: 1819 case MAT_USE_INODES: 1820 case MAT_IGNORE_ZERO_ENTRIES: 1821 MatCheckPreallocated(A,1); 1822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1823 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_ROW_ORIENTED: 1826 MatCheckPreallocated(A,1); 1827 a->roworiented = flg; 1828 1829 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1830 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1831 break; 1832 case MAT_NEW_DIAGONALS: 1833 case MAT_SORTED_FULL: 1834 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1835 break; 1836 case MAT_IGNORE_OFF_PROC_ENTRIES: 1837 a->donotstash = flg; 1838 break; 1839 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1840 case MAT_SPD: 1841 case MAT_SYMMETRIC: 1842 case MAT_STRUCTURALLY_SYMMETRIC: 1843 case MAT_HERMITIAN: 1844 case MAT_SYMMETRY_ETERNAL: 1845 break; 1846 case MAT_SUBMAT_SINGLEIS: 1847 A->submat_singleis = flg; 1848 break; 1849 case MAT_STRUCTURE_ONLY: 1850 /* The option is handled directly by MatSetOption() */ 1851 break; 1852 default: 1853 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1854 } 1855 PetscFunctionReturn(0); 1856 } 1857 1858 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1859 { 1860 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1861 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1862 PetscErrorCode ierr; 1863 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1864 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1865 PetscInt *cmap,*idx_p; 1866 1867 PetscFunctionBegin; 1868 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1869 mat->getrowactive = PETSC_TRUE; 1870 1871 if (!mat->rowvalues && (idx || v)) { 1872 /* 1873 allocate enough space to hold information from the longest row. 1874 */ 1875 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1876 PetscInt max = 1,tmp; 1877 for (i=0; i<matin->rmap->n; i++) { 1878 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1879 if (max < tmp) max = tmp; 1880 } 1881 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1882 } 1883 1884 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1885 lrow = row - rstart; 1886 1887 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1888 if (!v) {pvA = NULL; pvB = NULL;} 1889 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1890 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1891 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1892 nztot = nzA + nzB; 1893 1894 cmap = mat->garray; 1895 if (v || idx) { 1896 if (nztot) { 1897 /* Sort by increasing column numbers, assuming A and B already sorted */ 1898 PetscInt imark = -1; 1899 if (v) { 1900 *v = v_p = mat->rowvalues; 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1903 else break; 1904 } 1905 imark = i; 1906 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1907 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1908 } 1909 if (idx) { 1910 *idx = idx_p = mat->rowindices; 1911 if (imark > -1) { 1912 for (i=0; i<imark; i++) { 1913 idx_p[i] = cmap[cworkB[i]]; 1914 } 1915 } else { 1916 for (i=0; i<nzB; i++) { 1917 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1918 else break; 1919 } 1920 imark = i; 1921 } 1922 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1923 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1924 } 1925 } else { 1926 if (idx) *idx = NULL; 1927 if (v) *v = NULL; 1928 } 1929 } 1930 *nz = nztot; 1931 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1932 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1937 { 1938 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1939 1940 PetscFunctionBegin; 1941 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1942 aij->getrowactive = PETSC_FALSE; 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1947 { 1948 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1949 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1950 PetscErrorCode ierr; 1951 PetscInt i,j,cstart = mat->cmap->rstart; 1952 PetscReal sum = 0.0; 1953 MatScalar *v; 1954 1955 PetscFunctionBegin; 1956 if (aij->size == 1) { 1957 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1958 } else { 1959 if (type == NORM_FROBENIUS) { 1960 v = amat->a; 1961 for (i=0; i<amat->nz; i++) { 1962 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1963 } 1964 v = bmat->a; 1965 for (i=0; i<bmat->nz; i++) { 1966 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1967 } 1968 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1969 *norm = PetscSqrtReal(*norm); 1970 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1971 } else if (type == NORM_1) { /* max column norm */ 1972 PetscReal *tmp,*tmp2; 1973 PetscInt *jj,*garray = aij->garray; 1974 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1975 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1976 *norm = 0.0; 1977 v = amat->a; jj = amat->j; 1978 for (j=0; j<amat->nz; j++) { 1979 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1980 } 1981 v = bmat->a; jj = bmat->j; 1982 for (j=0; j<bmat->nz; j++) { 1983 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1984 } 1985 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1986 for (j=0; j<mat->cmap->N; j++) { 1987 if (tmp2[j] > *norm) *norm = tmp2[j]; 1988 } 1989 ierr = PetscFree(tmp);CHKERRQ(ierr); 1990 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1991 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1992 } else if (type == NORM_INFINITY) { /* max row norm */ 1993 PetscReal ntemp = 0.0; 1994 for (j=0; j<aij->A->rmap->n; j++) { 1995 v = amat->a + amat->i[j]; 1996 sum = 0.0; 1997 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1998 sum += PetscAbsScalar(*v); v++; 1999 } 2000 v = bmat->a + bmat->i[j]; 2001 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2002 sum += PetscAbsScalar(*v); v++; 2003 } 2004 if (sum > ntemp) ntemp = sum; 2005 } 2006 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2007 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2008 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2014 { 2015 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2016 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2017 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2018 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2019 PetscErrorCode ierr; 2020 Mat B,A_diag,*B_diag; 2021 const MatScalar *array; 2022 2023 PetscFunctionBegin; 2024 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2025 ai = Aloc->i; aj = Aloc->j; 2026 bi = Bloc->i; bj = Bloc->j; 2027 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2028 PetscInt *d_nnz,*g_nnz,*o_nnz; 2029 PetscSFNode *oloc; 2030 PETSC_UNUSED PetscSF sf; 2031 2032 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2033 /* compute d_nnz for preallocation */ 2034 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2035 for (i=0; i<ai[ma]; i++) { 2036 d_nnz[aj[i]]++; 2037 } 2038 /* compute local off-diagonal contributions */ 2039 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2040 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2041 /* map those to global */ 2042 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2043 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2044 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2045 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2046 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2047 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2048 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2049 2050 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2051 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2052 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2053 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2054 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2055 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2056 } else { 2057 B = *matout; 2058 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2059 } 2060 2061 b = (Mat_MPIAIJ*)B->data; 2062 A_diag = a->A; 2063 B_diag = &b->A; 2064 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2065 A_diag_ncol = A_diag->cmap->N; 2066 B_diag_ilen = sub_B_diag->ilen; 2067 B_diag_i = sub_B_diag->i; 2068 2069 /* Set ilen for diagonal of B */ 2070 for (i=0; i<A_diag_ncol; i++) { 2071 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2072 } 2073 2074 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2075 very quickly (=without using MatSetValues), because all writes are local. */ 2076 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2077 2078 /* copy over the B part */ 2079 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2080 array = Bloc->a; 2081 row = A->rmap->rstart; 2082 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2083 cols_tmp = cols; 2084 for (i=0; i<mb; i++) { 2085 ncol = bi[i+1]-bi[i]; 2086 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2087 row++; 2088 array += ncol; cols_tmp += ncol; 2089 } 2090 ierr = PetscFree(cols);CHKERRQ(ierr); 2091 2092 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2093 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2094 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2095 *matout = B; 2096 } else { 2097 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2098 } 2099 PetscFunctionReturn(0); 2100 } 2101 2102 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2103 { 2104 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2105 Mat a = aij->A,b = aij->B; 2106 PetscErrorCode ierr; 2107 PetscInt s1,s2,s3; 2108 2109 PetscFunctionBegin; 2110 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2111 if (rr) { 2112 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2113 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2114 /* Overlap communication with computation. */ 2115 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 } 2117 if (ll) { 2118 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2119 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2120 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2121 } 2122 /* scale the diagonal block */ 2123 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2124 2125 if (rr) { 2126 /* Do a scatter end and then right scale the off-diagonal block */ 2127 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2128 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2129 } 2130 PetscFunctionReturn(0); 2131 } 2132 2133 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2134 { 2135 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2144 { 2145 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2146 Mat a,b,c,d; 2147 PetscBool flg; 2148 PetscErrorCode ierr; 2149 2150 PetscFunctionBegin; 2151 a = matA->A; b = matA->B; 2152 c = matB->A; d = matB->B; 2153 2154 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2155 if (flg) { 2156 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2157 } 2158 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2163 { 2164 PetscErrorCode ierr; 2165 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2166 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2167 2168 PetscFunctionBegin; 2169 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2170 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2171 /* because of the column compression in the off-processor part of the matrix a->B, 2172 the number of columns in a->B and b->B may be different, hence we cannot call 2173 the MatCopy() directly on the two parts. If need be, we can provide a more 2174 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2175 then copying the submatrices */ 2176 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2177 } else { 2178 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2179 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2180 } 2181 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2186 { 2187 PetscErrorCode ierr; 2188 2189 PetscFunctionBegin; 2190 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 /* 2195 Computes the number of nonzeros per row needed for preallocation when X and Y 2196 have different nonzero structure. 2197 */ 2198 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2199 { 2200 PetscInt i,j,k,nzx,nzy; 2201 2202 PetscFunctionBegin; 2203 /* Set the number of nonzeros in the new matrix */ 2204 for (i=0; i<m; i++) { 2205 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2206 nzx = xi[i+1] - xi[i]; 2207 nzy = yi[i+1] - yi[i]; 2208 nnz[i] = 0; 2209 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2210 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2211 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2212 nnz[i]++; 2213 } 2214 for (; k<nzy; k++) nnz[i]++; 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 2219 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2220 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2221 { 2222 PetscErrorCode ierr; 2223 PetscInt m = Y->rmap->N; 2224 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2225 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2226 2227 PetscFunctionBegin; 2228 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2233 { 2234 PetscErrorCode ierr; 2235 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2236 PetscBLASInt bnz,one=1; 2237 Mat_SeqAIJ *x,*y; 2238 2239 PetscFunctionBegin; 2240 if (str == SAME_NONZERO_PATTERN) { 2241 PetscScalar alpha = a; 2242 x = (Mat_SeqAIJ*)xx->A->data; 2243 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2244 y = (Mat_SeqAIJ*)yy->A->data; 2245 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2246 x = (Mat_SeqAIJ*)xx->B->data; 2247 y = (Mat_SeqAIJ*)yy->B->data; 2248 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2249 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2250 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2251 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2252 will be updated */ 2253 #if defined(PETSC_HAVE_DEVICE) 2254 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2255 Y->offloadmask = PETSC_OFFLOAD_CPU; 2256 } 2257 #endif 2258 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2259 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2260 } else { 2261 Mat B; 2262 PetscInt *nnz_d,*nnz_o; 2263 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2264 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2265 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2266 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2267 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2268 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2269 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2270 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2271 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2272 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2273 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2274 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2275 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2276 } 2277 PetscFunctionReturn(0); 2278 } 2279 2280 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2281 2282 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2283 { 2284 #if defined(PETSC_USE_COMPLEX) 2285 PetscErrorCode ierr; 2286 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2287 2288 PetscFunctionBegin; 2289 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2290 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2291 #else 2292 PetscFunctionBegin; 2293 #endif 2294 PetscFunctionReturn(0); 2295 } 2296 2297 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2298 { 2299 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2304 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2309 { 2310 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2315 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2320 { 2321 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2322 PetscErrorCode ierr; 2323 PetscInt i,*idxb = NULL,m = A->rmap->n; 2324 PetscScalar *va,*vv; 2325 Vec vB,vA; 2326 const PetscScalar *vb; 2327 2328 PetscFunctionBegin; 2329 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2330 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2331 2332 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2333 if (idx) { 2334 for (i=0; i<m; i++) { 2335 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2336 } 2337 } 2338 2339 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2340 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2341 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2342 2343 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2344 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2345 for (i=0; i<m; i++) { 2346 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2347 vv[i] = vb[i]; 2348 if (idx) idx[i] = a->garray[idxb[i]]; 2349 } else { 2350 vv[i] = va[i]; 2351 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2352 idx[i] = a->garray[idxb[i]]; 2353 } 2354 } 2355 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2356 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2357 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2358 ierr = PetscFree(idxb);CHKERRQ(ierr); 2359 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2360 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2365 { 2366 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2367 PetscInt m = A->rmap->n,n = A->cmap->n; 2368 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2369 PetscInt *cmap = mat->garray; 2370 PetscInt *diagIdx, *offdiagIdx; 2371 Vec diagV, offdiagV; 2372 PetscScalar *a, *diagA, *offdiagA, *ba; 2373 PetscInt r,j,col,ncols,*bi,*bj; 2374 PetscErrorCode ierr; 2375 Mat B = mat->B; 2376 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2377 2378 PetscFunctionBegin; 2379 /* When a process holds entire A and other processes have no entry */ 2380 if (A->cmap->N == n) { 2381 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2382 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2383 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2384 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2385 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } else if (n == 0) { 2388 if (m) { 2389 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2390 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2391 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2392 } 2393 PetscFunctionReturn(0); 2394 } 2395 2396 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2397 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2398 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2399 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2400 2401 /* Get offdiagIdx[] for implicit 0.0 */ 2402 ba = b->a; 2403 bi = b->i; 2404 bj = b->j; 2405 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2406 for (r = 0; r < m; r++) { 2407 ncols = bi[r+1] - bi[r]; 2408 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2409 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2410 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2411 offdiagA[r] = 0.0; 2412 2413 /* Find first hole in the cmap */ 2414 for (j=0; j<ncols; j++) { 2415 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2416 if (col > j && j < cstart) { 2417 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2418 break; 2419 } else if (col > j + n && j >= cstart) { 2420 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2421 break; 2422 } 2423 } 2424 if (j == ncols && ncols < A->cmap->N - n) { 2425 /* a hole is outside compressed Bcols */ 2426 if (ncols == 0) { 2427 if (cstart) { 2428 offdiagIdx[r] = 0; 2429 } else offdiagIdx[r] = cend; 2430 } else { /* ncols > 0 */ 2431 offdiagIdx[r] = cmap[ncols-1] + 1; 2432 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2433 } 2434 } 2435 } 2436 2437 for (j=0; j<ncols; j++) { 2438 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2439 ba++; bj++; 2440 } 2441 } 2442 2443 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2444 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2445 for (r = 0; r < m; ++r) { 2446 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2447 a[r] = diagA[r]; 2448 if (idx) idx[r] = cstart + diagIdx[r]; 2449 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2450 a[r] = diagA[r]; 2451 if (idx) { 2452 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2453 idx[r] = cstart + diagIdx[r]; 2454 } else idx[r] = offdiagIdx[r]; 2455 } 2456 } else { 2457 a[r] = offdiagA[r]; 2458 if (idx) idx[r] = offdiagIdx[r]; 2459 } 2460 } 2461 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2462 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2463 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2464 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2465 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2466 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2471 { 2472 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2473 PetscInt m = A->rmap->n,n = A->cmap->n; 2474 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2475 PetscInt *cmap = mat->garray; 2476 PetscInt *diagIdx, *offdiagIdx; 2477 Vec diagV, offdiagV; 2478 PetscScalar *a, *diagA, *offdiagA, *ba; 2479 PetscInt r,j,col,ncols,*bi,*bj; 2480 PetscErrorCode ierr; 2481 Mat B = mat->B; 2482 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2483 2484 PetscFunctionBegin; 2485 /* When a process holds entire A and other processes have no entry */ 2486 if (A->cmap->N == n) { 2487 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2488 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2489 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2490 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2491 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } else if (n == 0) { 2494 if (m) { 2495 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2496 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2497 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2498 } 2499 PetscFunctionReturn(0); 2500 } 2501 2502 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2503 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2504 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2505 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2506 2507 /* Get offdiagIdx[] for implicit 0.0 */ 2508 ba = b->a; 2509 bi = b->i; 2510 bj = b->j; 2511 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2512 for (r = 0; r < m; r++) { 2513 ncols = bi[r+1] - bi[r]; 2514 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2515 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2516 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2517 offdiagA[r] = 0.0; 2518 2519 /* Find first hole in the cmap */ 2520 for (j=0; j<ncols; j++) { 2521 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2522 if (col > j && j < cstart) { 2523 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2524 break; 2525 } else if (col > j + n && j >= cstart) { 2526 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2527 break; 2528 } 2529 } 2530 if (j == ncols && ncols < A->cmap->N - n) { 2531 /* a hole is outside compressed Bcols */ 2532 if (ncols == 0) { 2533 if (cstart) { 2534 offdiagIdx[r] = 0; 2535 } else offdiagIdx[r] = cend; 2536 } else { /* ncols > 0 */ 2537 offdiagIdx[r] = cmap[ncols-1] + 1; 2538 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2539 } 2540 } 2541 } 2542 2543 for (j=0; j<ncols; j++) { 2544 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2545 ba++; bj++; 2546 } 2547 } 2548 2549 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2550 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2551 for (r = 0; r < m; ++r) { 2552 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2553 a[r] = diagA[r]; 2554 if (idx) idx[r] = cstart + diagIdx[r]; 2555 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2556 a[r] = diagA[r]; 2557 if (idx) { 2558 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2559 idx[r] = cstart + diagIdx[r]; 2560 } else idx[r] = offdiagIdx[r]; 2561 } 2562 } else { 2563 a[r] = offdiagA[r]; 2564 if (idx) idx[r] = offdiagIdx[r]; 2565 } 2566 } 2567 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2568 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2569 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2570 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2571 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2572 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2577 { 2578 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2579 PetscInt m = A->rmap->n,n = A->cmap->n; 2580 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2581 PetscInt *cmap = mat->garray; 2582 PetscInt *diagIdx, *offdiagIdx; 2583 Vec diagV, offdiagV; 2584 PetscScalar *a, *diagA, *offdiagA, *ba; 2585 PetscInt r,j,col,ncols,*bi,*bj; 2586 PetscErrorCode ierr; 2587 Mat B = mat->B; 2588 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2589 2590 PetscFunctionBegin; 2591 /* When a process holds entire A and other processes have no entry */ 2592 if (A->cmap->N == n) { 2593 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2594 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2595 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2596 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2597 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2598 PetscFunctionReturn(0); 2599 } else if (n == 0) { 2600 if (m) { 2601 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2602 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2603 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2604 } 2605 PetscFunctionReturn(0); 2606 } 2607 2608 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2609 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2610 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2611 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2612 2613 /* Get offdiagIdx[] for implicit 0.0 */ 2614 ba = b->a; 2615 bi = b->i; 2616 bj = b->j; 2617 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2618 for (r = 0; r < m; r++) { 2619 ncols = bi[r+1] - bi[r]; 2620 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2621 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2622 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2623 offdiagA[r] = 0.0; 2624 2625 /* Find first hole in the cmap */ 2626 for (j=0; j<ncols; j++) { 2627 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2628 if (col > j && j < cstart) { 2629 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2630 break; 2631 } else if (col > j + n && j >= cstart) { 2632 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2633 break; 2634 } 2635 } 2636 if (j == ncols && ncols < A->cmap->N - n) { 2637 /* a hole is outside compressed Bcols */ 2638 if (ncols == 0) { 2639 if (cstart) { 2640 offdiagIdx[r] = 0; 2641 } else offdiagIdx[r] = cend; 2642 } else { /* ncols > 0 */ 2643 offdiagIdx[r] = cmap[ncols-1] + 1; 2644 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2645 } 2646 } 2647 } 2648 2649 for (j=0; j<ncols; j++) { 2650 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2651 ba++; bj++; 2652 } 2653 } 2654 2655 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2656 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2657 for (r = 0; r < m; ++r) { 2658 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2659 a[r] = diagA[r]; 2660 if (idx) idx[r] = cstart + diagIdx[r]; 2661 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2662 a[r] = diagA[r]; 2663 if (idx) { 2664 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2665 idx[r] = cstart + diagIdx[r]; 2666 } else idx[r] = offdiagIdx[r]; 2667 } 2668 } else { 2669 a[r] = offdiagA[r]; 2670 if (idx) idx[r] = offdiagIdx[r]; 2671 } 2672 } 2673 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2674 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2675 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2676 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2677 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2678 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2683 { 2684 PetscErrorCode ierr; 2685 Mat *dummy; 2686 2687 PetscFunctionBegin; 2688 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2689 *newmat = *dummy; 2690 ierr = PetscFree(dummy);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2695 { 2696 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2697 PetscErrorCode ierr; 2698 2699 PetscFunctionBegin; 2700 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2701 A->factorerrortype = a->A->factorerrortype; 2702 PetscFunctionReturn(0); 2703 } 2704 2705 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2706 { 2707 PetscErrorCode ierr; 2708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2709 2710 PetscFunctionBegin; 2711 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2712 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2713 if (x->assembled) { 2714 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2715 } else { 2716 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2717 } 2718 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2719 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2724 { 2725 PetscFunctionBegin; 2726 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2727 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2728 PetscFunctionReturn(0); 2729 } 2730 2731 /*@ 2732 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2733 2734 Collective on Mat 2735 2736 Input Parameters: 2737 + A - the matrix 2738 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2739 2740 Level: advanced 2741 2742 @*/ 2743 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2744 { 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2753 { 2754 PetscErrorCode ierr; 2755 PetscBool sc = PETSC_FALSE,flg; 2756 2757 PetscFunctionBegin; 2758 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2759 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2760 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2761 if (flg) { 2762 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2763 } 2764 ierr = PetscOptionsTail();CHKERRQ(ierr); 2765 PetscFunctionReturn(0); 2766 } 2767 2768 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2769 { 2770 PetscErrorCode ierr; 2771 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2772 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2773 2774 PetscFunctionBegin; 2775 if (!Y->preallocated) { 2776 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2777 } else if (!aij->nz) { 2778 PetscInt nonew = aij->nonew; 2779 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2780 aij->nonew = nonew; 2781 } 2782 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2787 { 2788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2793 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2794 if (d) { 2795 PetscInt rstart; 2796 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2797 *d += rstart; 2798 2799 } 2800 PetscFunctionReturn(0); 2801 } 2802 2803 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2804 { 2805 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2806 PetscErrorCode ierr; 2807 2808 PetscFunctionBegin; 2809 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 /* -------------------------------------------------------------------*/ 2814 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2815 MatGetRow_MPIAIJ, 2816 MatRestoreRow_MPIAIJ, 2817 MatMult_MPIAIJ, 2818 /* 4*/ MatMultAdd_MPIAIJ, 2819 MatMultTranspose_MPIAIJ, 2820 MatMultTransposeAdd_MPIAIJ, 2821 NULL, 2822 NULL, 2823 NULL, 2824 /*10*/ NULL, 2825 NULL, 2826 NULL, 2827 MatSOR_MPIAIJ, 2828 MatTranspose_MPIAIJ, 2829 /*15*/ MatGetInfo_MPIAIJ, 2830 MatEqual_MPIAIJ, 2831 MatGetDiagonal_MPIAIJ, 2832 MatDiagonalScale_MPIAIJ, 2833 MatNorm_MPIAIJ, 2834 /*20*/ MatAssemblyBegin_MPIAIJ, 2835 MatAssemblyEnd_MPIAIJ, 2836 MatSetOption_MPIAIJ, 2837 MatZeroEntries_MPIAIJ, 2838 /*24*/ MatZeroRows_MPIAIJ, 2839 NULL, 2840 NULL, 2841 NULL, 2842 NULL, 2843 /*29*/ MatSetUp_MPIAIJ, 2844 NULL, 2845 NULL, 2846 MatGetDiagonalBlock_MPIAIJ, 2847 NULL, 2848 /*34*/ MatDuplicate_MPIAIJ, 2849 NULL, 2850 NULL, 2851 NULL, 2852 NULL, 2853 /*39*/ MatAXPY_MPIAIJ, 2854 MatCreateSubMatrices_MPIAIJ, 2855 MatIncreaseOverlap_MPIAIJ, 2856 MatGetValues_MPIAIJ, 2857 MatCopy_MPIAIJ, 2858 /*44*/ MatGetRowMax_MPIAIJ, 2859 MatScale_MPIAIJ, 2860 MatShift_MPIAIJ, 2861 MatDiagonalSet_MPIAIJ, 2862 MatZeroRowsColumns_MPIAIJ, 2863 /*49*/ MatSetRandom_MPIAIJ, 2864 NULL, 2865 NULL, 2866 NULL, 2867 NULL, 2868 /*54*/ MatFDColoringCreate_MPIXAIJ, 2869 NULL, 2870 MatSetUnfactored_MPIAIJ, 2871 MatPermute_MPIAIJ, 2872 NULL, 2873 /*59*/ MatCreateSubMatrix_MPIAIJ, 2874 MatDestroy_MPIAIJ, 2875 MatView_MPIAIJ, 2876 NULL, 2877 NULL, 2878 /*64*/ NULL, 2879 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2880 NULL, 2881 NULL, 2882 NULL, 2883 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2884 MatGetRowMinAbs_MPIAIJ, 2885 NULL, 2886 NULL, 2887 NULL, 2888 NULL, 2889 /*75*/ MatFDColoringApply_AIJ, 2890 MatSetFromOptions_MPIAIJ, 2891 NULL, 2892 NULL, 2893 MatFindZeroDiagonals_MPIAIJ, 2894 /*80*/ NULL, 2895 NULL, 2896 NULL, 2897 /*83*/ MatLoad_MPIAIJ, 2898 MatIsSymmetric_MPIAIJ, 2899 NULL, 2900 NULL, 2901 NULL, 2902 NULL, 2903 /*89*/ NULL, 2904 NULL, 2905 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2906 NULL, 2907 NULL, 2908 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2909 NULL, 2910 NULL, 2911 NULL, 2912 MatBindToCPU_MPIAIJ, 2913 /*99*/ MatProductSetFromOptions_MPIAIJ, 2914 NULL, 2915 NULL, 2916 MatConjugate_MPIAIJ, 2917 NULL, 2918 /*104*/MatSetValuesRow_MPIAIJ, 2919 MatRealPart_MPIAIJ, 2920 MatImaginaryPart_MPIAIJ, 2921 NULL, 2922 NULL, 2923 /*109*/NULL, 2924 NULL, 2925 MatGetRowMin_MPIAIJ, 2926 NULL, 2927 MatMissingDiagonal_MPIAIJ, 2928 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2929 NULL, 2930 MatGetGhosts_MPIAIJ, 2931 NULL, 2932 NULL, 2933 /*119*/NULL, 2934 NULL, 2935 NULL, 2936 NULL, 2937 MatGetMultiProcBlock_MPIAIJ, 2938 /*124*/MatFindNonzeroRows_MPIAIJ, 2939 MatGetColumnNorms_MPIAIJ, 2940 MatInvertBlockDiagonal_MPIAIJ, 2941 MatInvertVariableBlockDiagonal_MPIAIJ, 2942 MatCreateSubMatricesMPI_MPIAIJ, 2943 /*129*/NULL, 2944 NULL, 2945 NULL, 2946 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2947 NULL, 2948 /*134*/NULL, 2949 NULL, 2950 NULL, 2951 NULL, 2952 NULL, 2953 /*139*/MatSetBlockSizes_MPIAIJ, 2954 NULL, 2955 NULL, 2956 MatFDColoringSetUp_MPIXAIJ, 2957 MatFindOffBlockDiagonalEntries_MPIAIJ, 2958 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2959 /*145*/NULL, 2960 NULL, 2961 NULL 2962 }; 2963 2964 /* ----------------------------------------------------------------------------------------*/ 2965 2966 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2967 { 2968 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2969 PetscErrorCode ierr; 2970 2971 PetscFunctionBegin; 2972 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2973 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2974 PetscFunctionReturn(0); 2975 } 2976 2977 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2978 { 2979 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2984 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2985 PetscFunctionReturn(0); 2986 } 2987 2988 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2989 { 2990 Mat_MPIAIJ *b; 2991 PetscErrorCode ierr; 2992 PetscMPIInt size; 2993 2994 PetscFunctionBegin; 2995 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2996 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2997 b = (Mat_MPIAIJ*)B->data; 2998 2999 #if defined(PETSC_USE_CTABLE) 3000 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3001 #else 3002 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3003 #endif 3004 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3005 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3006 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3007 3008 /* Because the B will have been resized we simply destroy it and create a new one each time */ 3009 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3010 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3011 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3012 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 3013 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3014 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3015 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3016 3017 if (!B->preallocated) { 3018 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3019 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3020 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3021 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3022 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3023 } 3024 3025 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3026 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3027 B->preallocated = PETSC_TRUE; 3028 B->was_assembled = PETSC_FALSE; 3029 B->assembled = PETSC_FALSE; 3030 PetscFunctionReturn(0); 3031 } 3032 3033 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 3034 { 3035 Mat_MPIAIJ *b; 3036 PetscErrorCode ierr; 3037 3038 PetscFunctionBegin; 3039 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3040 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3041 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3042 b = (Mat_MPIAIJ*)B->data; 3043 3044 #if defined(PETSC_USE_CTABLE) 3045 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3046 #else 3047 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3048 #endif 3049 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3050 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3051 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3052 3053 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 3054 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 3055 B->preallocated = PETSC_TRUE; 3056 B->was_assembled = PETSC_FALSE; 3057 B->assembled = PETSC_FALSE; 3058 PetscFunctionReturn(0); 3059 } 3060 3061 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3062 { 3063 Mat mat; 3064 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3065 PetscErrorCode ierr; 3066 3067 PetscFunctionBegin; 3068 *newmat = NULL; 3069 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3070 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3071 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3072 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3073 a = (Mat_MPIAIJ*)mat->data; 3074 3075 mat->factortype = matin->factortype; 3076 mat->assembled = matin->assembled; 3077 mat->insertmode = NOT_SET_VALUES; 3078 mat->preallocated = matin->preallocated; 3079 3080 a->size = oldmat->size; 3081 a->rank = oldmat->rank; 3082 a->donotstash = oldmat->donotstash; 3083 a->roworiented = oldmat->roworiented; 3084 a->rowindices = NULL; 3085 a->rowvalues = NULL; 3086 a->getrowactive = PETSC_FALSE; 3087 3088 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3089 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3090 3091 if (oldmat->colmap) { 3092 #if defined(PETSC_USE_CTABLE) 3093 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3094 #else 3095 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 3096 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3097 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 3098 #endif 3099 } else a->colmap = NULL; 3100 if (oldmat->garray) { 3101 PetscInt len; 3102 len = oldmat->B->cmap->n; 3103 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 3104 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3105 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 3106 } else a->garray = NULL; 3107 3108 /* It may happen MatDuplicate is called with a non-assembled matrix 3109 In fact, MatDuplicate only requires the matrix to be preallocated 3110 This may happen inside a DMCreateMatrix_Shell */ 3111 if (oldmat->lvec) { 3112 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3113 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3114 } 3115 if (oldmat->Mvctx) { 3116 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3117 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3118 } 3119 if (oldmat->Mvctx_mpi1) { 3120 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 3121 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 3122 } 3123 3124 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3125 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3126 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3127 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3128 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3129 *newmat = mat; 3130 PetscFunctionReturn(0); 3131 } 3132 3133 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3134 { 3135 PetscBool isbinary, ishdf5; 3136 PetscErrorCode ierr; 3137 3138 PetscFunctionBegin; 3139 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3140 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3141 /* force binary viewer to load .info file if it has not yet done so */ 3142 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3143 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3144 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3145 if (isbinary) { 3146 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3147 } else if (ishdf5) { 3148 #if defined(PETSC_HAVE_HDF5) 3149 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3150 #else 3151 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3152 #endif 3153 } else { 3154 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3155 } 3156 PetscFunctionReturn(0); 3157 } 3158 3159 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3160 { 3161 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3162 PetscInt *rowidxs,*colidxs; 3163 PetscScalar *matvals; 3164 PetscErrorCode ierr; 3165 3166 PetscFunctionBegin; 3167 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3168 3169 /* read in matrix header */ 3170 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3171 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3172 M = header[1]; N = header[2]; nz = header[3]; 3173 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3174 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3175 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3176 3177 /* set block sizes from the viewer's .info file */ 3178 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3179 /* set global sizes if not set already */ 3180 if (mat->rmap->N < 0) mat->rmap->N = M; 3181 if (mat->cmap->N < 0) mat->cmap->N = N; 3182 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3183 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3184 3185 /* check if the matrix sizes are correct */ 3186 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3187 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3188 3189 /* read in row lengths and build row indices */ 3190 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3191 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3192 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3193 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3194 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3195 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3196 /* read in column indices and matrix values */ 3197 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3198 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3199 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3200 /* store matrix indices and values */ 3201 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3202 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3203 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3204 PetscFunctionReturn(0); 3205 } 3206 3207 /* Not scalable because of ISAllGather() unless getting all columns. */ 3208 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3209 { 3210 PetscErrorCode ierr; 3211 IS iscol_local; 3212 PetscBool isstride; 3213 PetscMPIInt lisstride=0,gisstride; 3214 3215 PetscFunctionBegin; 3216 /* check if we are grabbing all columns*/ 3217 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3218 3219 if (isstride) { 3220 PetscInt start,len,mstart,mlen; 3221 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3222 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3223 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3224 if (mstart == start && mlen-mstart == len) lisstride = 1; 3225 } 3226 3227 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3228 if (gisstride) { 3229 PetscInt N; 3230 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3231 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3232 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3233 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3234 } else { 3235 PetscInt cbs; 3236 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3237 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3238 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3239 } 3240 3241 *isseq = iscol_local; 3242 PetscFunctionReturn(0); 3243 } 3244 3245 /* 3246 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3247 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3248 3249 Input Parameters: 3250 mat - matrix 3251 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3252 i.e., mat->rstart <= isrow[i] < mat->rend 3253 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3254 i.e., mat->cstart <= iscol[i] < mat->cend 3255 Output Parameter: 3256 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3257 iscol_o - sequential column index set for retrieving mat->B 3258 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3259 */ 3260 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3261 { 3262 PetscErrorCode ierr; 3263 Vec x,cmap; 3264 const PetscInt *is_idx; 3265 PetscScalar *xarray,*cmaparray; 3266 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3267 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3268 Mat B=a->B; 3269 Vec lvec=a->lvec,lcmap; 3270 PetscInt i,cstart,cend,Bn=B->cmap->N; 3271 MPI_Comm comm; 3272 VecScatter Mvctx=a->Mvctx; 3273 3274 PetscFunctionBegin; 3275 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3276 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3277 3278 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3279 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3280 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3281 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3282 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3283 3284 /* Get start indices */ 3285 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3286 isstart -= ncols; 3287 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3288 3289 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3290 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3291 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3292 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3293 for (i=0; i<ncols; i++) { 3294 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3295 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3296 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3297 } 3298 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3299 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3300 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3301 3302 /* Get iscol_d */ 3303 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3304 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3305 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3306 3307 /* Get isrow_d */ 3308 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3309 rstart = mat->rmap->rstart; 3310 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3311 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3312 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3313 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3314 3315 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3316 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3317 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3318 3319 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3320 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3321 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3322 3323 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3324 3325 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3326 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3327 3328 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3329 /* off-process column indices */ 3330 count = 0; 3331 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3332 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3333 3334 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3335 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3336 for (i=0; i<Bn; i++) { 3337 if (PetscRealPart(xarray[i]) > -1.0) { 3338 idx[count] = i; /* local column index in off-diagonal part B */ 3339 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3340 count++; 3341 } 3342 } 3343 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3344 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3345 3346 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3347 /* cannot ensure iscol_o has same blocksize as iscol! */ 3348 3349 ierr = PetscFree(idx);CHKERRQ(ierr); 3350 *garray = cmap1; 3351 3352 ierr = VecDestroy(&x);CHKERRQ(ierr); 3353 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3354 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3355 PetscFunctionReturn(0); 3356 } 3357 3358 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3359 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3360 { 3361 PetscErrorCode ierr; 3362 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3363 Mat M = NULL; 3364 MPI_Comm comm; 3365 IS iscol_d,isrow_d,iscol_o; 3366 Mat Asub = NULL,Bsub = NULL; 3367 PetscInt n; 3368 3369 PetscFunctionBegin; 3370 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3371 3372 if (call == MAT_REUSE_MATRIX) { 3373 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3374 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3375 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3376 3377 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3378 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3379 3380 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3381 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3382 3383 /* Update diagonal and off-diagonal portions of submat */ 3384 asub = (Mat_MPIAIJ*)(*submat)->data; 3385 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3386 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3387 if (n) { 3388 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3389 } 3390 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3391 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3392 3393 } else { /* call == MAT_INITIAL_MATRIX) */ 3394 const PetscInt *garray; 3395 PetscInt BsubN; 3396 3397 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3398 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3399 3400 /* Create local submatrices Asub and Bsub */ 3401 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3402 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3403 3404 /* Create submatrix M */ 3405 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3406 3407 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3408 asub = (Mat_MPIAIJ*)M->data; 3409 3410 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3411 n = asub->B->cmap->N; 3412 if (BsubN > n) { 3413 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3414 const PetscInt *idx; 3415 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3416 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3417 3418 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3419 j = 0; 3420 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3421 for (i=0; i<n; i++) { 3422 if (j >= BsubN) break; 3423 while (subgarray[i] > garray[j]) j++; 3424 3425 if (subgarray[i] == garray[j]) { 3426 idx_new[i] = idx[j++]; 3427 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3428 } 3429 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3430 3431 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3432 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3433 3434 } else if (BsubN < n) { 3435 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3436 } 3437 3438 ierr = PetscFree(garray);CHKERRQ(ierr); 3439 *submat = M; 3440 3441 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3442 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3443 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3444 3445 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3446 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3447 3448 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3449 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3450 } 3451 PetscFunctionReturn(0); 3452 } 3453 3454 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3455 { 3456 PetscErrorCode ierr; 3457 IS iscol_local=NULL,isrow_d; 3458 PetscInt csize; 3459 PetscInt n,i,j,start,end; 3460 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3461 MPI_Comm comm; 3462 3463 PetscFunctionBegin; 3464 /* If isrow has same processor distribution as mat, 3465 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3466 if (call == MAT_REUSE_MATRIX) { 3467 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3468 if (isrow_d) { 3469 sameRowDist = PETSC_TRUE; 3470 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3471 } else { 3472 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3473 if (iscol_local) { 3474 sameRowDist = PETSC_TRUE; 3475 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3476 } 3477 } 3478 } else { 3479 /* Check if isrow has same processor distribution as mat */ 3480 sameDist[0] = PETSC_FALSE; 3481 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3482 if (!n) { 3483 sameDist[0] = PETSC_TRUE; 3484 } else { 3485 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3486 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3487 if (i >= start && j < end) { 3488 sameDist[0] = PETSC_TRUE; 3489 } 3490 } 3491 3492 /* Check if iscol has same processor distribution as mat */ 3493 sameDist[1] = PETSC_FALSE; 3494 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3495 if (!n) { 3496 sameDist[1] = PETSC_TRUE; 3497 } else { 3498 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3499 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3500 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3501 } 3502 3503 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3504 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3505 sameRowDist = tsameDist[0]; 3506 } 3507 3508 if (sameRowDist) { 3509 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3510 /* isrow and iscol have same processor distribution as mat */ 3511 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3512 PetscFunctionReturn(0); 3513 } else { /* sameRowDist */ 3514 /* isrow has same processor distribution as mat */ 3515 if (call == MAT_INITIAL_MATRIX) { 3516 PetscBool sorted; 3517 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3518 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3519 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3520 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3521 3522 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3523 if (sorted) { 3524 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3525 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3526 PetscFunctionReturn(0); 3527 } 3528 } else { /* call == MAT_REUSE_MATRIX */ 3529 IS iscol_sub; 3530 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3531 if (iscol_sub) { 3532 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3533 PetscFunctionReturn(0); 3534 } 3535 } 3536 } 3537 } 3538 3539 /* General case: iscol -> iscol_local which has global size of iscol */ 3540 if (call == MAT_REUSE_MATRIX) { 3541 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3542 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3543 } else { 3544 if (!iscol_local) { 3545 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3546 } 3547 } 3548 3549 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3550 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3551 3552 if (call == MAT_INITIAL_MATRIX) { 3553 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3554 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3555 } 3556 PetscFunctionReturn(0); 3557 } 3558 3559 /*@C 3560 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3561 and "off-diagonal" part of the matrix in CSR format. 3562 3563 Collective 3564 3565 Input Parameters: 3566 + comm - MPI communicator 3567 . A - "diagonal" portion of matrix 3568 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3569 - garray - global index of B columns 3570 3571 Output Parameter: 3572 . mat - the matrix, with input A as its local diagonal matrix 3573 Level: advanced 3574 3575 Notes: 3576 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3577 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3578 3579 .seealso: MatCreateMPIAIJWithSplitArrays() 3580 @*/ 3581 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3582 { 3583 PetscErrorCode ierr; 3584 Mat_MPIAIJ *maij; 3585 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3586 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3587 PetscScalar *oa=b->a; 3588 Mat Bnew; 3589 PetscInt m,n,N; 3590 3591 PetscFunctionBegin; 3592 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3593 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3594 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3595 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3596 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3597 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3598 3599 /* Get global columns of mat */ 3600 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3601 3602 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3603 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3604 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3605 maij = (Mat_MPIAIJ*)(*mat)->data; 3606 3607 (*mat)->preallocated = PETSC_TRUE; 3608 3609 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3610 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3611 3612 /* Set A as diagonal portion of *mat */ 3613 maij->A = A; 3614 3615 nz = oi[m]; 3616 for (i=0; i<nz; i++) { 3617 col = oj[i]; 3618 oj[i] = garray[col]; 3619 } 3620 3621 /* Set Bnew as off-diagonal portion of *mat */ 3622 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3623 bnew = (Mat_SeqAIJ*)Bnew->data; 3624 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3625 maij->B = Bnew; 3626 3627 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3628 3629 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3630 b->free_a = PETSC_FALSE; 3631 b->free_ij = PETSC_FALSE; 3632 ierr = MatDestroy(&B);CHKERRQ(ierr); 3633 3634 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3635 bnew->free_a = PETSC_TRUE; 3636 bnew->free_ij = PETSC_TRUE; 3637 3638 /* condense columns of maij->B */ 3639 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3640 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3641 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3642 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3643 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3644 PetscFunctionReturn(0); 3645 } 3646 3647 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3648 3649 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3650 { 3651 PetscErrorCode ierr; 3652 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3653 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3654 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3655 Mat M,Msub,B=a->B; 3656 MatScalar *aa; 3657 Mat_SeqAIJ *aij; 3658 PetscInt *garray = a->garray,*colsub,Ncols; 3659 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3660 IS iscol_sub,iscmap; 3661 const PetscInt *is_idx,*cmap; 3662 PetscBool allcolumns=PETSC_FALSE; 3663 MPI_Comm comm; 3664 3665 PetscFunctionBegin; 3666 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3667 3668 if (call == MAT_REUSE_MATRIX) { 3669 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3670 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3671 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3672 3673 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3674 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3675 3676 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3677 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3678 3679 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3680 3681 } else { /* call == MAT_INITIAL_MATRIX) */ 3682 PetscBool flg; 3683 3684 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3685 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3686 3687 /* (1) iscol -> nonscalable iscol_local */ 3688 /* Check for special case: each processor gets entire matrix columns */ 3689 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3690 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3691 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3692 if (allcolumns) { 3693 iscol_sub = iscol_local; 3694 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3695 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3696 3697 } else { 3698 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3699 PetscInt *idx,*cmap1,k; 3700 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3701 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3702 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3703 count = 0; 3704 k = 0; 3705 for (i=0; i<Ncols; i++) { 3706 j = is_idx[i]; 3707 if (j >= cstart && j < cend) { 3708 /* diagonal part of mat */ 3709 idx[count] = j; 3710 cmap1[count++] = i; /* column index in submat */ 3711 } else if (Bn) { 3712 /* off-diagonal part of mat */ 3713 if (j == garray[k]) { 3714 idx[count] = j; 3715 cmap1[count++] = i; /* column index in submat */ 3716 } else if (j > garray[k]) { 3717 while (j > garray[k] && k < Bn-1) k++; 3718 if (j == garray[k]) { 3719 idx[count] = j; 3720 cmap1[count++] = i; /* column index in submat */ 3721 } 3722 } 3723 } 3724 } 3725 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3726 3727 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3728 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3729 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3730 3731 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3732 } 3733 3734 /* (3) Create sequential Msub */ 3735 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3736 } 3737 3738 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3739 aij = (Mat_SeqAIJ*)(Msub)->data; 3740 ii = aij->i; 3741 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3742 3743 /* 3744 m - number of local rows 3745 Ncols - number of columns (same on all processors) 3746 rstart - first row in new global matrix generated 3747 */ 3748 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3749 3750 if (call == MAT_INITIAL_MATRIX) { 3751 /* (4) Create parallel newmat */ 3752 PetscMPIInt rank,size; 3753 PetscInt csize; 3754 3755 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3756 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3757 3758 /* 3759 Determine the number of non-zeros in the diagonal and off-diagonal 3760 portions of the matrix in order to do correct preallocation 3761 */ 3762 3763 /* first get start and end of "diagonal" columns */ 3764 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3765 if (csize == PETSC_DECIDE) { 3766 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3767 if (mglobal == Ncols) { /* square matrix */ 3768 nlocal = m; 3769 } else { 3770 nlocal = Ncols/size + ((Ncols % size) > rank); 3771 } 3772 } else { 3773 nlocal = csize; 3774 } 3775 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3776 rstart = rend - nlocal; 3777 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3778 3779 /* next, compute all the lengths */ 3780 jj = aij->j; 3781 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3782 olens = dlens + m; 3783 for (i=0; i<m; i++) { 3784 jend = ii[i+1] - ii[i]; 3785 olen = 0; 3786 dlen = 0; 3787 for (j=0; j<jend; j++) { 3788 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3789 else dlen++; 3790 jj++; 3791 } 3792 olens[i] = olen; 3793 dlens[i] = dlen; 3794 } 3795 3796 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3797 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3798 3799 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3800 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3801 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3802 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3803 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3804 ierr = PetscFree(dlens);CHKERRQ(ierr); 3805 3806 } else { /* call == MAT_REUSE_MATRIX */ 3807 M = *newmat; 3808 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3809 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3810 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3811 /* 3812 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3813 rather than the slower MatSetValues(). 3814 */ 3815 M->was_assembled = PETSC_TRUE; 3816 M->assembled = PETSC_FALSE; 3817 } 3818 3819 /* (5) Set values of Msub to *newmat */ 3820 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3821 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3822 3823 jj = aij->j; 3824 aa = aij->a; 3825 for (i=0; i<m; i++) { 3826 row = rstart + i; 3827 nz = ii[i+1] - ii[i]; 3828 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3829 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3830 jj += nz; aa += nz; 3831 } 3832 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3833 3834 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3835 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3836 3837 ierr = PetscFree(colsub);CHKERRQ(ierr); 3838 3839 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3840 if (call == MAT_INITIAL_MATRIX) { 3841 *newmat = M; 3842 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3843 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3844 3845 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3846 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3847 3848 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3849 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3850 3851 if (iscol_local) { 3852 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3853 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3854 } 3855 } 3856 PetscFunctionReturn(0); 3857 } 3858 3859 /* 3860 Not great since it makes two copies of the submatrix, first an SeqAIJ 3861 in local and then by concatenating the local matrices the end result. 3862 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3863 3864 Note: This requires a sequential iscol with all indices. 3865 */ 3866 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3867 { 3868 PetscErrorCode ierr; 3869 PetscMPIInt rank,size; 3870 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3871 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3872 Mat M,Mreuse; 3873 MatScalar *aa,*vwork; 3874 MPI_Comm comm; 3875 Mat_SeqAIJ *aij; 3876 PetscBool colflag,allcolumns=PETSC_FALSE; 3877 3878 PetscFunctionBegin; 3879 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3880 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3881 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3882 3883 /* Check for special case: each processor gets entire matrix columns */ 3884 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3885 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3886 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3887 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3888 3889 if (call == MAT_REUSE_MATRIX) { 3890 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3891 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3892 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3893 } else { 3894 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3895 } 3896 3897 /* 3898 m - number of local rows 3899 n - number of columns (same on all processors) 3900 rstart - first row in new global matrix generated 3901 */ 3902 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3903 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3904 if (call == MAT_INITIAL_MATRIX) { 3905 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3906 ii = aij->i; 3907 jj = aij->j; 3908 3909 /* 3910 Determine the number of non-zeros in the diagonal and off-diagonal 3911 portions of the matrix in order to do correct preallocation 3912 */ 3913 3914 /* first get start and end of "diagonal" columns */ 3915 if (csize == PETSC_DECIDE) { 3916 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3917 if (mglobal == n) { /* square matrix */ 3918 nlocal = m; 3919 } else { 3920 nlocal = n/size + ((n % size) > rank); 3921 } 3922 } else { 3923 nlocal = csize; 3924 } 3925 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3926 rstart = rend - nlocal; 3927 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3928 3929 /* next, compute all the lengths */ 3930 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3931 olens = dlens + m; 3932 for (i=0; i<m; i++) { 3933 jend = ii[i+1] - ii[i]; 3934 olen = 0; 3935 dlen = 0; 3936 for (j=0; j<jend; j++) { 3937 if (*jj < rstart || *jj >= rend) olen++; 3938 else dlen++; 3939 jj++; 3940 } 3941 olens[i] = olen; 3942 dlens[i] = dlen; 3943 } 3944 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3945 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3946 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3947 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3948 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3949 ierr = PetscFree(dlens);CHKERRQ(ierr); 3950 } else { 3951 PetscInt ml,nl; 3952 3953 M = *newmat; 3954 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3955 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3956 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3957 /* 3958 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3959 rather than the slower MatSetValues(). 3960 */ 3961 M->was_assembled = PETSC_TRUE; 3962 M->assembled = PETSC_FALSE; 3963 } 3964 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3965 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3966 ii = aij->i; 3967 jj = aij->j; 3968 aa = aij->a; 3969 for (i=0; i<m; i++) { 3970 row = rstart + i; 3971 nz = ii[i+1] - ii[i]; 3972 cwork = jj; jj += nz; 3973 vwork = aa; aa += nz; 3974 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3975 } 3976 3977 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3978 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3979 *newmat = M; 3980 3981 /* save submatrix used in processor for next request */ 3982 if (call == MAT_INITIAL_MATRIX) { 3983 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3984 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3985 } 3986 PetscFunctionReturn(0); 3987 } 3988 3989 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3990 { 3991 PetscInt m,cstart, cend,j,nnz,i,d; 3992 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3993 const PetscInt *JJ; 3994 PetscErrorCode ierr; 3995 PetscBool nooffprocentries; 3996 3997 PetscFunctionBegin; 3998 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3999 4000 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 4001 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 4002 m = B->rmap->n; 4003 cstart = B->cmap->rstart; 4004 cend = B->cmap->rend; 4005 rstart = B->rmap->rstart; 4006 4007 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 4008 4009 if (PetscDefined(USE_DEBUG)) { 4010 for (i=0; i<m; i++) { 4011 nnz = Ii[i+1]- Ii[i]; 4012 JJ = J + Ii[i]; 4013 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 4014 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 4015 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4016 } 4017 } 4018 4019 for (i=0; i<m; i++) { 4020 nnz = Ii[i+1]- Ii[i]; 4021 JJ = J + Ii[i]; 4022 nnz_max = PetscMax(nnz_max,nnz); 4023 d = 0; 4024 for (j=0; j<nnz; j++) { 4025 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4026 } 4027 d_nnz[i] = d; 4028 o_nnz[i] = nnz - d; 4029 } 4030 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4031 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4032 4033 for (i=0; i<m; i++) { 4034 ii = i + rstart; 4035 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4036 } 4037 nooffprocentries = B->nooffprocentries; 4038 B->nooffprocentries = PETSC_TRUE; 4039 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4040 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4041 B->nooffprocentries = nooffprocentries; 4042 4043 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4044 PetscFunctionReturn(0); 4045 } 4046 4047 /*@ 4048 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4049 (the default parallel PETSc format). 4050 4051 Collective 4052 4053 Input Parameters: 4054 + B - the matrix 4055 . i - the indices into j for the start of each local row (starts with zero) 4056 . j - the column indices for each local row (starts with zero) 4057 - v - optional values in the matrix 4058 4059 Level: developer 4060 4061 Notes: 4062 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4063 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4064 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4065 4066 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4067 4068 The format which is used for the sparse matrix input, is equivalent to a 4069 row-major ordering.. i.e for the following matrix, the input data expected is 4070 as shown 4071 4072 $ 1 0 0 4073 $ 2 0 3 P0 4074 $ ------- 4075 $ 4 5 6 P1 4076 $ 4077 $ Process0 [P0]: rows_owned=[0,1] 4078 $ i = {0,1,3} [size = nrow+1 = 2+1] 4079 $ j = {0,0,2} [size = 3] 4080 $ v = {1,2,3} [size = 3] 4081 $ 4082 $ Process1 [P1]: rows_owned=[2] 4083 $ i = {0,3} [size = nrow+1 = 1+1] 4084 $ j = {0,1,2} [size = 3] 4085 $ v = {4,5,6} [size = 3] 4086 4087 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4088 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4089 @*/ 4090 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4091 { 4092 PetscErrorCode ierr; 4093 4094 PetscFunctionBegin; 4095 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4096 PetscFunctionReturn(0); 4097 } 4098 4099 /*@C 4100 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4101 (the default parallel PETSc format). For good matrix assembly performance 4102 the user should preallocate the matrix storage by setting the parameters 4103 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4104 performance can be increased by more than a factor of 50. 4105 4106 Collective 4107 4108 Input Parameters: 4109 + B - the matrix 4110 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4111 (same value is used for all local rows) 4112 . d_nnz - array containing the number of nonzeros in the various rows of the 4113 DIAGONAL portion of the local submatrix (possibly different for each row) 4114 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4115 The size of this array is equal to the number of local rows, i.e 'm'. 4116 For matrices that will be factored, you must leave room for (and set) 4117 the diagonal entry even if it is zero. 4118 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4119 submatrix (same value is used for all local rows). 4120 - o_nnz - array containing the number of nonzeros in the various rows of the 4121 OFF-DIAGONAL portion of the local submatrix (possibly different for 4122 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4123 structure. The size of this array is equal to the number 4124 of local rows, i.e 'm'. 4125 4126 If the *_nnz parameter is given then the *_nz parameter is ignored 4127 4128 The AIJ format (also called the Yale sparse matrix format or 4129 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4130 storage. The stored row and column indices begin with zero. 4131 See Users-Manual: ch_mat for details. 4132 4133 The parallel matrix is partitioned such that the first m0 rows belong to 4134 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4135 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4136 4137 The DIAGONAL portion of the local submatrix of a processor can be defined 4138 as the submatrix which is obtained by extraction the part corresponding to 4139 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4140 first row that belongs to the processor, r2 is the last row belonging to 4141 the this processor, and c1-c2 is range of indices of the local part of a 4142 vector suitable for applying the matrix to. This is an mxn matrix. In the 4143 common case of a square matrix, the row and column ranges are the same and 4144 the DIAGONAL part is also square. The remaining portion of the local 4145 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4146 4147 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4148 4149 You can call MatGetInfo() to get information on how effective the preallocation was; 4150 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4151 You can also run with the option -info and look for messages with the string 4152 malloc in them to see if additional memory allocation was needed. 4153 4154 Example usage: 4155 4156 Consider the following 8x8 matrix with 34 non-zero values, that is 4157 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4158 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4159 as follows: 4160 4161 .vb 4162 1 2 0 | 0 3 0 | 0 4 4163 Proc0 0 5 6 | 7 0 0 | 8 0 4164 9 0 10 | 11 0 0 | 12 0 4165 ------------------------------------- 4166 13 0 14 | 15 16 17 | 0 0 4167 Proc1 0 18 0 | 19 20 21 | 0 0 4168 0 0 0 | 22 23 0 | 24 0 4169 ------------------------------------- 4170 Proc2 25 26 27 | 0 0 28 | 29 0 4171 30 0 0 | 31 32 33 | 0 34 4172 .ve 4173 4174 This can be represented as a collection of submatrices as: 4175 4176 .vb 4177 A B C 4178 D E F 4179 G H I 4180 .ve 4181 4182 Where the submatrices A,B,C are owned by proc0, D,E,F are 4183 owned by proc1, G,H,I are owned by proc2. 4184 4185 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4186 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4187 The 'M','N' parameters are 8,8, and have the same values on all procs. 4188 4189 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4190 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4191 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4192 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4193 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4194 matrix, ans [DF] as another SeqAIJ matrix. 4195 4196 When d_nz, o_nz parameters are specified, d_nz storage elements are 4197 allocated for every row of the local diagonal submatrix, and o_nz 4198 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4199 One way to choose d_nz and o_nz is to use the max nonzerors per local 4200 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4201 In this case, the values of d_nz,o_nz are: 4202 .vb 4203 proc0 : dnz = 2, o_nz = 2 4204 proc1 : dnz = 3, o_nz = 2 4205 proc2 : dnz = 1, o_nz = 4 4206 .ve 4207 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4208 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4209 for proc3. i.e we are using 12+15+10=37 storage locations to store 4210 34 values. 4211 4212 When d_nnz, o_nnz parameters are specified, the storage is specified 4213 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4214 In the above case the values for d_nnz,o_nnz are: 4215 .vb 4216 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4217 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4218 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4219 .ve 4220 Here the space allocated is sum of all the above values i.e 34, and 4221 hence pre-allocation is perfect. 4222 4223 Level: intermediate 4224 4225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4226 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4227 @*/ 4228 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4229 { 4230 PetscErrorCode ierr; 4231 4232 PetscFunctionBegin; 4233 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4234 PetscValidType(B,1); 4235 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4236 PetscFunctionReturn(0); 4237 } 4238 4239 /*@ 4240 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4241 CSR format for the local rows. 4242 4243 Collective 4244 4245 Input Parameters: 4246 + comm - MPI communicator 4247 . m - number of local rows (Cannot be PETSC_DECIDE) 4248 . n - This value should be the same as the local size used in creating the 4249 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4250 calculated if N is given) For square matrices n is almost always m. 4251 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4252 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4253 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4254 . j - column indices 4255 - a - matrix values 4256 4257 Output Parameter: 4258 . mat - the matrix 4259 4260 Level: intermediate 4261 4262 Notes: 4263 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4264 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4265 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4266 4267 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4268 4269 The format which is used for the sparse matrix input, is equivalent to a 4270 row-major ordering.. i.e for the following matrix, the input data expected is 4271 as shown 4272 4273 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4274 4275 $ 1 0 0 4276 $ 2 0 3 P0 4277 $ ------- 4278 $ 4 5 6 P1 4279 $ 4280 $ Process0 [P0]: rows_owned=[0,1] 4281 $ i = {0,1,3} [size = nrow+1 = 2+1] 4282 $ j = {0,0,2} [size = 3] 4283 $ v = {1,2,3} [size = 3] 4284 $ 4285 $ Process1 [P1]: rows_owned=[2] 4286 $ i = {0,3} [size = nrow+1 = 1+1] 4287 $ j = {0,1,2} [size = 3] 4288 $ v = {4,5,6} [size = 3] 4289 4290 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4291 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4292 @*/ 4293 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4294 { 4295 PetscErrorCode ierr; 4296 4297 PetscFunctionBegin; 4298 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4299 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4300 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4301 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4302 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4303 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4304 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4305 PetscFunctionReturn(0); 4306 } 4307 4308 /*@ 4309 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4310 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4311 4312 Collective 4313 4314 Input Parameters: 4315 + mat - the matrix 4316 . m - number of local rows (Cannot be PETSC_DECIDE) 4317 . n - This value should be the same as the local size used in creating the 4318 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4319 calculated if N is given) For square matrices n is almost always m. 4320 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4321 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4322 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4323 . J - column indices 4324 - v - matrix values 4325 4326 Level: intermediate 4327 4328 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4329 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4330 @*/ 4331 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4332 { 4333 PetscErrorCode ierr; 4334 PetscInt cstart,nnz,i,j; 4335 PetscInt *ld; 4336 PetscBool nooffprocentries; 4337 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4338 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4339 PetscScalar *ad = Ad->a, *ao = Ao->a; 4340 const PetscInt *Adi = Ad->i; 4341 PetscInt ldi,Iii,md; 4342 4343 PetscFunctionBegin; 4344 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4345 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4346 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4347 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4348 4349 cstart = mat->cmap->rstart; 4350 if (!Aij->ld) { 4351 /* count number of entries below block diagonal */ 4352 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4353 Aij->ld = ld; 4354 for (i=0; i<m; i++) { 4355 nnz = Ii[i+1]- Ii[i]; 4356 j = 0; 4357 while (J[j] < cstart && j < nnz) {j++;} 4358 J += nnz; 4359 ld[i] = j; 4360 } 4361 } else { 4362 ld = Aij->ld; 4363 } 4364 4365 for (i=0; i<m; i++) { 4366 nnz = Ii[i+1]- Ii[i]; 4367 Iii = Ii[i]; 4368 ldi = ld[i]; 4369 md = Adi[i+1]-Adi[i]; 4370 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4371 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4372 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4373 ad += md; 4374 ao += nnz - md; 4375 } 4376 nooffprocentries = mat->nooffprocentries; 4377 mat->nooffprocentries = PETSC_TRUE; 4378 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4379 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4380 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4381 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4382 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 mat->nooffprocentries = nooffprocentries; 4384 PetscFunctionReturn(0); 4385 } 4386 4387 /*@C 4388 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4389 (the default parallel PETSc format). For good matrix assembly performance 4390 the user should preallocate the matrix storage by setting the parameters 4391 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4392 performance can be increased by more than a factor of 50. 4393 4394 Collective 4395 4396 Input Parameters: 4397 + comm - MPI communicator 4398 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4399 This value should be the same as the local size used in creating the 4400 y vector for the matrix-vector product y = Ax. 4401 . n - This value should be the same as the local size used in creating the 4402 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4403 calculated if N is given) For square matrices n is almost always m. 4404 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4405 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4406 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4407 (same value is used for all local rows) 4408 . d_nnz - array containing the number of nonzeros in the various rows of the 4409 DIAGONAL portion of the local submatrix (possibly different for each row) 4410 or NULL, if d_nz is used to specify the nonzero structure. 4411 The size of this array is equal to the number of local rows, i.e 'm'. 4412 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4413 submatrix (same value is used for all local rows). 4414 - o_nnz - array containing the number of nonzeros in the various rows of the 4415 OFF-DIAGONAL portion of the local submatrix (possibly different for 4416 each row) or NULL, if o_nz is used to specify the nonzero 4417 structure. The size of this array is equal to the number 4418 of local rows, i.e 'm'. 4419 4420 Output Parameter: 4421 . A - the matrix 4422 4423 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4424 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4425 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4426 4427 Notes: 4428 If the *_nnz parameter is given then the *_nz parameter is ignored 4429 4430 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4431 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4432 storage requirements for this matrix. 4433 4434 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4435 processor than it must be used on all processors that share the object for 4436 that argument. 4437 4438 The user MUST specify either the local or global matrix dimensions 4439 (possibly both). 4440 4441 The parallel matrix is partitioned across processors such that the 4442 first m0 rows belong to process 0, the next m1 rows belong to 4443 process 1, the next m2 rows belong to process 2 etc.. where 4444 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4445 values corresponding to [m x N] submatrix. 4446 4447 The columns are logically partitioned with the n0 columns belonging 4448 to 0th partition, the next n1 columns belonging to the next 4449 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4450 4451 The DIAGONAL portion of the local submatrix on any given processor 4452 is the submatrix corresponding to the rows and columns m,n 4453 corresponding to the given processor. i.e diagonal matrix on 4454 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4455 etc. The remaining portion of the local submatrix [m x (N-n)] 4456 constitute the OFF-DIAGONAL portion. The example below better 4457 illustrates this concept. 4458 4459 For a square global matrix we define each processor's diagonal portion 4460 to be its local rows and the corresponding columns (a square submatrix); 4461 each processor's off-diagonal portion encompasses the remainder of the 4462 local matrix (a rectangular submatrix). 4463 4464 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4465 4466 When calling this routine with a single process communicator, a matrix of 4467 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4468 type of communicator, use the construction mechanism 4469 .vb 4470 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4471 .ve 4472 4473 $ MatCreate(...,&A); 4474 $ MatSetType(A,MATMPIAIJ); 4475 $ MatSetSizes(A, m,n,M,N); 4476 $ MatMPIAIJSetPreallocation(A,...); 4477 4478 By default, this format uses inodes (identical nodes) when possible. 4479 We search for consecutive rows with the same nonzero structure, thereby 4480 reusing matrix information to achieve increased efficiency. 4481 4482 Options Database Keys: 4483 + -mat_no_inode - Do not use inodes 4484 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4485 4486 4487 4488 Example usage: 4489 4490 Consider the following 8x8 matrix with 34 non-zero values, that is 4491 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4492 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4493 as follows 4494 4495 .vb 4496 1 2 0 | 0 3 0 | 0 4 4497 Proc0 0 5 6 | 7 0 0 | 8 0 4498 9 0 10 | 11 0 0 | 12 0 4499 ------------------------------------- 4500 13 0 14 | 15 16 17 | 0 0 4501 Proc1 0 18 0 | 19 20 21 | 0 0 4502 0 0 0 | 22 23 0 | 24 0 4503 ------------------------------------- 4504 Proc2 25 26 27 | 0 0 28 | 29 0 4505 30 0 0 | 31 32 33 | 0 34 4506 .ve 4507 4508 This can be represented as a collection of submatrices as 4509 4510 .vb 4511 A B C 4512 D E F 4513 G H I 4514 .ve 4515 4516 Where the submatrices A,B,C are owned by proc0, D,E,F are 4517 owned by proc1, G,H,I are owned by proc2. 4518 4519 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4520 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4521 The 'M','N' parameters are 8,8, and have the same values on all procs. 4522 4523 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4524 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4525 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4526 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4527 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4528 matrix, ans [DF] as another SeqAIJ matrix. 4529 4530 When d_nz, o_nz parameters are specified, d_nz storage elements are 4531 allocated for every row of the local diagonal submatrix, and o_nz 4532 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4533 One way to choose d_nz and o_nz is to use the max nonzerors per local 4534 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4535 In this case, the values of d_nz,o_nz are 4536 .vb 4537 proc0 : dnz = 2, o_nz = 2 4538 proc1 : dnz = 3, o_nz = 2 4539 proc2 : dnz = 1, o_nz = 4 4540 .ve 4541 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4542 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4543 for proc3. i.e we are using 12+15+10=37 storage locations to store 4544 34 values. 4545 4546 When d_nnz, o_nnz parameters are specified, the storage is specified 4547 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4548 In the above case the values for d_nnz,o_nnz are 4549 .vb 4550 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4551 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4552 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4553 .ve 4554 Here the space allocated is sum of all the above values i.e 34, and 4555 hence pre-allocation is perfect. 4556 4557 Level: intermediate 4558 4559 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4560 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4561 @*/ 4562 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4563 { 4564 PetscErrorCode ierr; 4565 PetscMPIInt size; 4566 4567 PetscFunctionBegin; 4568 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4569 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4570 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4571 if (size > 1) { 4572 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4573 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4574 } else { 4575 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4576 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4577 } 4578 PetscFunctionReturn(0); 4579 } 4580 4581 /*@C 4582 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4583 4584 Not collective 4585 4586 Input Parameter: 4587 . A - The MPIAIJ matrix 4588 4589 Output Parameters: 4590 + Ad - The local diagonal block as a SeqAIJ matrix 4591 . Ao - The local off-diagonal block as a SeqAIJ matrix 4592 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4593 4594 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4595 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4596 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4597 local column numbers to global column numbers in the original matrix. 4598 4599 Level: intermediate 4600 4601 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4602 @*/ 4603 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4604 { 4605 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4606 PetscBool flg; 4607 PetscErrorCode ierr; 4608 4609 PetscFunctionBegin; 4610 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4611 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4612 if (Ad) *Ad = a->A; 4613 if (Ao) *Ao = a->B; 4614 if (colmap) *colmap = a->garray; 4615 PetscFunctionReturn(0); 4616 } 4617 4618 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4619 { 4620 PetscErrorCode ierr; 4621 PetscInt m,N,i,rstart,nnz,Ii; 4622 PetscInt *indx; 4623 PetscScalar *values; 4624 4625 PetscFunctionBegin; 4626 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4627 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4628 PetscInt *dnz,*onz,sum,bs,cbs; 4629 4630 if (n == PETSC_DECIDE) { 4631 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4632 } 4633 /* Check sum(n) = N */ 4634 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4635 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4636 4637 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4638 rstart -= m; 4639 4640 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4641 for (i=0; i<m; i++) { 4642 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4643 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4644 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4645 } 4646 4647 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4648 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4649 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4650 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4651 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4652 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4653 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4654 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4655 } 4656 4657 /* numeric phase */ 4658 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4659 for (i=0; i<m; i++) { 4660 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4661 Ii = i + rstart; 4662 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4663 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4664 } 4665 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4666 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4667 PetscFunctionReturn(0); 4668 } 4669 4670 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4671 { 4672 PetscErrorCode ierr; 4673 PetscMPIInt rank; 4674 PetscInt m,N,i,rstart,nnz; 4675 size_t len; 4676 const PetscInt *indx; 4677 PetscViewer out; 4678 char *name; 4679 Mat B; 4680 const PetscScalar *values; 4681 4682 PetscFunctionBegin; 4683 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4684 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4685 /* Should this be the type of the diagonal block of A? */ 4686 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4687 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4688 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4689 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4690 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4691 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4692 for (i=0; i<m; i++) { 4693 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4694 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4695 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4696 } 4697 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4698 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4699 4700 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4701 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4702 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4703 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4704 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4705 ierr = PetscFree(name);CHKERRQ(ierr); 4706 ierr = MatView(B,out);CHKERRQ(ierr); 4707 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4708 ierr = MatDestroy(&B);CHKERRQ(ierr); 4709 PetscFunctionReturn(0); 4710 } 4711 4712 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4713 { 4714 PetscErrorCode ierr; 4715 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4716 4717 PetscFunctionBegin; 4718 if (!merge) PetscFunctionReturn(0); 4719 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4720 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4721 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4722 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4723 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4724 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4725 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4726 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4727 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4728 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4729 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4730 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4731 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4732 ierr = PetscFree(merge);CHKERRQ(ierr); 4733 PetscFunctionReturn(0); 4734 } 4735 4736 #include <../src/mat/utils/freespace.h> 4737 #include <petscbt.h> 4738 4739 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4740 { 4741 PetscErrorCode ierr; 4742 MPI_Comm comm; 4743 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4744 PetscMPIInt size,rank,taga,*len_s; 4745 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4746 PetscInt proc,m; 4747 PetscInt **buf_ri,**buf_rj; 4748 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4749 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4750 MPI_Request *s_waits,*r_waits; 4751 MPI_Status *status; 4752 MatScalar *aa=a->a; 4753 MatScalar **abuf_r,*ba_i; 4754 Mat_Merge_SeqsToMPI *merge; 4755 PetscContainer container; 4756 4757 PetscFunctionBegin; 4758 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4759 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4760 4761 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4762 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4763 4764 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4765 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4766 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4767 4768 bi = merge->bi; 4769 bj = merge->bj; 4770 buf_ri = merge->buf_ri; 4771 buf_rj = merge->buf_rj; 4772 4773 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4774 owners = merge->rowmap->range; 4775 len_s = merge->len_s; 4776 4777 /* send and recv matrix values */ 4778 /*-----------------------------*/ 4779 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4780 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4781 4782 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4783 for (proc=0,k=0; proc<size; proc++) { 4784 if (!len_s[proc]) continue; 4785 i = owners[proc]; 4786 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4787 k++; 4788 } 4789 4790 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4791 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4792 ierr = PetscFree(status);CHKERRQ(ierr); 4793 4794 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4795 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4796 4797 /* insert mat values of mpimat */ 4798 /*----------------------------*/ 4799 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4800 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4801 4802 for (k=0; k<merge->nrecv; k++) { 4803 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4804 nrows = *(buf_ri_k[k]); 4805 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4806 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4807 } 4808 4809 /* set values of ba */ 4810 m = merge->rowmap->n; 4811 for (i=0; i<m; i++) { 4812 arow = owners[rank] + i; 4813 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4814 bnzi = bi[i+1] - bi[i]; 4815 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4816 4817 /* add local non-zero vals of this proc's seqmat into ba */ 4818 anzi = ai[arow+1] - ai[arow]; 4819 aj = a->j + ai[arow]; 4820 aa = a->a + ai[arow]; 4821 nextaj = 0; 4822 for (j=0; nextaj<anzi; j++) { 4823 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4824 ba_i[j] += aa[nextaj++]; 4825 } 4826 } 4827 4828 /* add received vals into ba */ 4829 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4830 /* i-th row */ 4831 if (i == *nextrow[k]) { 4832 anzi = *(nextai[k]+1) - *nextai[k]; 4833 aj = buf_rj[k] + *(nextai[k]); 4834 aa = abuf_r[k] + *(nextai[k]); 4835 nextaj = 0; 4836 for (j=0; nextaj<anzi; j++) { 4837 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4838 ba_i[j] += aa[nextaj++]; 4839 } 4840 } 4841 nextrow[k]++; nextai[k]++; 4842 } 4843 } 4844 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4845 } 4846 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4847 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4848 4849 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4850 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4851 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4852 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4853 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4854 PetscFunctionReturn(0); 4855 } 4856 4857 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4858 { 4859 PetscErrorCode ierr; 4860 Mat B_mpi; 4861 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4862 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4863 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4864 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4865 PetscInt len,proc,*dnz,*onz,bs,cbs; 4866 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4867 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4868 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4869 MPI_Status *status; 4870 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4871 PetscBT lnkbt; 4872 Mat_Merge_SeqsToMPI *merge; 4873 PetscContainer container; 4874 4875 PetscFunctionBegin; 4876 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4877 4878 /* make sure it is a PETSc comm */ 4879 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4880 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4881 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4882 4883 ierr = PetscNew(&merge);CHKERRQ(ierr); 4884 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4885 4886 /* determine row ownership */ 4887 /*---------------------------------------------------------*/ 4888 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4889 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4890 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4891 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4892 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4893 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4894 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4895 4896 m = merge->rowmap->n; 4897 owners = merge->rowmap->range; 4898 4899 /* determine the number of messages to send, their lengths */ 4900 /*---------------------------------------------------------*/ 4901 len_s = merge->len_s; 4902 4903 len = 0; /* length of buf_si[] */ 4904 merge->nsend = 0; 4905 for (proc=0; proc<size; proc++) { 4906 len_si[proc] = 0; 4907 if (proc == rank) { 4908 len_s[proc] = 0; 4909 } else { 4910 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4911 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4912 } 4913 if (len_s[proc]) { 4914 merge->nsend++; 4915 nrows = 0; 4916 for (i=owners[proc]; i<owners[proc+1]; i++) { 4917 if (ai[i+1] > ai[i]) nrows++; 4918 } 4919 len_si[proc] = 2*(nrows+1); 4920 len += len_si[proc]; 4921 } 4922 } 4923 4924 /* determine the number and length of messages to receive for ij-structure */ 4925 /*-------------------------------------------------------------------------*/ 4926 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4927 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4928 4929 /* post the Irecv of j-structure */ 4930 /*-------------------------------*/ 4931 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4932 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4933 4934 /* post the Isend of j-structure */ 4935 /*--------------------------------*/ 4936 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4937 4938 for (proc=0, k=0; proc<size; proc++) { 4939 if (!len_s[proc]) continue; 4940 i = owners[proc]; 4941 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4942 k++; 4943 } 4944 4945 /* receives and sends of j-structure are complete */ 4946 /*------------------------------------------------*/ 4947 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4948 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4949 4950 /* send and recv i-structure */ 4951 /*---------------------------*/ 4952 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4953 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4954 4955 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4956 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4957 for (proc=0,k=0; proc<size; proc++) { 4958 if (!len_s[proc]) continue; 4959 /* form outgoing message for i-structure: 4960 buf_si[0]: nrows to be sent 4961 [1:nrows]: row index (global) 4962 [nrows+1:2*nrows+1]: i-structure index 4963 */ 4964 /*-------------------------------------------*/ 4965 nrows = len_si[proc]/2 - 1; 4966 buf_si_i = buf_si + nrows+1; 4967 buf_si[0] = nrows; 4968 buf_si_i[0] = 0; 4969 nrows = 0; 4970 for (i=owners[proc]; i<owners[proc+1]; i++) { 4971 anzi = ai[i+1] - ai[i]; 4972 if (anzi) { 4973 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4974 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4975 nrows++; 4976 } 4977 } 4978 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4979 k++; 4980 buf_si += len_si[proc]; 4981 } 4982 4983 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4984 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4985 4986 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4987 for (i=0; i<merge->nrecv; i++) { 4988 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4989 } 4990 4991 ierr = PetscFree(len_si);CHKERRQ(ierr); 4992 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4993 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4994 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4995 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4996 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4997 ierr = PetscFree(status);CHKERRQ(ierr); 4998 4999 /* compute a local seq matrix in each processor */ 5000 /*----------------------------------------------*/ 5001 /* allocate bi array and free space for accumulating nonzero column info */ 5002 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 5003 bi[0] = 0; 5004 5005 /* create and initialize a linked list */ 5006 nlnk = N+1; 5007 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5008 5009 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5010 len = ai[owners[rank+1]] - ai[owners[rank]]; 5011 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 5012 5013 current_space = free_space; 5014 5015 /* determine symbolic info for each local row */ 5016 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 5017 5018 for (k=0; k<merge->nrecv; k++) { 5019 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5020 nrows = *buf_ri_k[k]; 5021 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5022 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5023 } 5024 5025 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5026 len = 0; 5027 for (i=0; i<m; i++) { 5028 bnzi = 0; 5029 /* add local non-zero cols of this proc's seqmat into lnk */ 5030 arow = owners[rank] + i; 5031 anzi = ai[arow+1] - ai[arow]; 5032 aj = a->j + ai[arow]; 5033 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5034 bnzi += nlnk; 5035 /* add received col data into lnk */ 5036 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5037 if (i == *nextrow[k]) { /* i-th row */ 5038 anzi = *(nextai[k]+1) - *nextai[k]; 5039 aj = buf_rj[k] + *nextai[k]; 5040 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5041 bnzi += nlnk; 5042 nextrow[k]++; nextai[k]++; 5043 } 5044 } 5045 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5046 5047 /* if free space is not available, make more free space */ 5048 if (current_space->local_remaining<bnzi) { 5049 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5050 nspacedouble++; 5051 } 5052 /* copy data into free space, then initialize lnk */ 5053 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5054 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5055 5056 current_space->array += bnzi; 5057 current_space->local_used += bnzi; 5058 current_space->local_remaining -= bnzi; 5059 5060 bi[i+1] = bi[i] + bnzi; 5061 } 5062 5063 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5064 5065 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5066 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5067 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5068 5069 /* create symbolic parallel matrix B_mpi */ 5070 /*---------------------------------------*/ 5071 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5072 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5073 if (n==PETSC_DECIDE) { 5074 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5075 } else { 5076 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5077 } 5078 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5079 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5080 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5081 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5082 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5083 5084 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5085 B_mpi->assembled = PETSC_FALSE; 5086 merge->bi = bi; 5087 merge->bj = bj; 5088 merge->buf_ri = buf_ri; 5089 merge->buf_rj = buf_rj; 5090 merge->coi = NULL; 5091 merge->coj = NULL; 5092 merge->owners_co = NULL; 5093 5094 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5095 5096 /* attach the supporting struct to B_mpi for reuse */ 5097 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5098 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5099 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 5100 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5101 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5102 *mpimat = B_mpi; 5103 5104 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5105 PetscFunctionReturn(0); 5106 } 5107 5108 /*@C 5109 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5110 matrices from each processor 5111 5112 Collective 5113 5114 Input Parameters: 5115 + comm - the communicators the parallel matrix will live on 5116 . seqmat - the input sequential matrices 5117 . m - number of local rows (or PETSC_DECIDE) 5118 . n - number of local columns (or PETSC_DECIDE) 5119 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5120 5121 Output Parameter: 5122 . mpimat - the parallel matrix generated 5123 5124 Level: advanced 5125 5126 Notes: 5127 The dimensions of the sequential matrix in each processor MUST be the same. 5128 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5129 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5130 @*/ 5131 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5132 { 5133 PetscErrorCode ierr; 5134 PetscMPIInt size; 5135 5136 PetscFunctionBegin; 5137 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5138 if (size == 1) { 5139 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5140 if (scall == MAT_INITIAL_MATRIX) { 5141 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5142 } else { 5143 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5144 } 5145 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5146 PetscFunctionReturn(0); 5147 } 5148 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5149 if (scall == MAT_INITIAL_MATRIX) { 5150 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5151 } 5152 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5153 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5154 PetscFunctionReturn(0); 5155 } 5156 5157 /*@ 5158 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5159 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5160 with MatGetSize() 5161 5162 Not Collective 5163 5164 Input Parameters: 5165 + A - the matrix 5166 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5167 5168 Output Parameter: 5169 . A_loc - the local sequential matrix generated 5170 5171 Level: developer 5172 5173 Notes: 5174 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5175 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5176 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5177 modify the values of the returned A_loc. 5178 5179 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5180 5181 @*/ 5182 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5183 { 5184 PetscErrorCode ierr; 5185 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5186 Mat_SeqAIJ *mat,*a,*b; 5187 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5188 MatScalar *aa,*ba,*cam; 5189 PetscScalar *ca; 5190 PetscMPIInt size; 5191 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5192 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5193 PetscBool match; 5194 5195 PetscFunctionBegin; 5196 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5197 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5198 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5199 if (size == 1) { 5200 if (scall == MAT_INITIAL_MATRIX) { 5201 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5202 *A_loc = mpimat->A; 5203 } else if (scall == MAT_REUSE_MATRIX) { 5204 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5205 } 5206 PetscFunctionReturn(0); 5207 } 5208 5209 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5210 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5211 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5212 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5213 aa = a->a; ba = b->a; 5214 if (scall == MAT_INITIAL_MATRIX) { 5215 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5216 ci[0] = 0; 5217 for (i=0; i<am; i++) { 5218 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5219 } 5220 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5221 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5222 k = 0; 5223 for (i=0; i<am; i++) { 5224 ncols_o = bi[i+1] - bi[i]; 5225 ncols_d = ai[i+1] - ai[i]; 5226 /* off-diagonal portion of A */ 5227 for (jo=0; jo<ncols_o; jo++) { 5228 col = cmap[*bj]; 5229 if (col >= cstart) break; 5230 cj[k] = col; bj++; 5231 ca[k++] = *ba++; 5232 } 5233 /* diagonal portion of A */ 5234 for (j=0; j<ncols_d; j++) { 5235 cj[k] = cstart + *aj++; 5236 ca[k++] = *aa++; 5237 } 5238 /* off-diagonal portion of A */ 5239 for (j=jo; j<ncols_o; j++) { 5240 cj[k] = cmap[*bj++]; 5241 ca[k++] = *ba++; 5242 } 5243 } 5244 /* put together the new matrix */ 5245 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5246 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5247 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5248 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5249 mat->free_a = PETSC_TRUE; 5250 mat->free_ij = PETSC_TRUE; 5251 mat->nonew = 0; 5252 } else if (scall == MAT_REUSE_MATRIX) { 5253 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5254 ci = mat->i; cj = mat->j; cam = mat->a; 5255 for (i=0; i<am; i++) { 5256 /* off-diagonal portion of A */ 5257 ncols_o = bi[i+1] - bi[i]; 5258 for (jo=0; jo<ncols_o; jo++) { 5259 col = cmap[*bj]; 5260 if (col >= cstart) break; 5261 *cam++ = *ba++; bj++; 5262 } 5263 /* diagonal portion of A */ 5264 ncols_d = ai[i+1] - ai[i]; 5265 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5266 /* off-diagonal portion of A */ 5267 for (j=jo; j<ncols_o; j++) { 5268 *cam++ = *ba++; bj++; 5269 } 5270 } 5271 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5272 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5273 PetscFunctionReturn(0); 5274 } 5275 5276 /*@C 5277 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5278 5279 Not Collective 5280 5281 Input Parameters: 5282 + A - the matrix 5283 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5284 - row, col - index sets of rows and columns to extract (or NULL) 5285 5286 Output Parameter: 5287 . A_loc - the local sequential matrix generated 5288 5289 Level: developer 5290 5291 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5292 5293 @*/ 5294 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5295 { 5296 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5297 PetscErrorCode ierr; 5298 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5299 IS isrowa,iscola; 5300 Mat *aloc; 5301 PetscBool match; 5302 5303 PetscFunctionBegin; 5304 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5305 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5306 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5307 if (!row) { 5308 start = A->rmap->rstart; end = A->rmap->rend; 5309 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5310 } else { 5311 isrowa = *row; 5312 } 5313 if (!col) { 5314 start = A->cmap->rstart; 5315 cmap = a->garray; 5316 nzA = a->A->cmap->n; 5317 nzB = a->B->cmap->n; 5318 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5319 ncols = 0; 5320 for (i=0; i<nzB; i++) { 5321 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5322 else break; 5323 } 5324 imark = i; 5325 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5326 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5327 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5328 } else { 5329 iscola = *col; 5330 } 5331 if (scall != MAT_INITIAL_MATRIX) { 5332 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5333 aloc[0] = *A_loc; 5334 } 5335 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5336 if (!col) { /* attach global id of condensed columns */ 5337 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5338 } 5339 *A_loc = aloc[0]; 5340 ierr = PetscFree(aloc);CHKERRQ(ierr); 5341 if (!row) { 5342 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5343 } 5344 if (!col) { 5345 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5346 } 5347 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5348 PetscFunctionReturn(0); 5349 } 5350 5351 /* 5352 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5353 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5354 * on a global size. 5355 * */ 5356 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5357 { 5358 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5359 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5360 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5361 PetscMPIInt owner; 5362 PetscSFNode *iremote,*oiremote; 5363 const PetscInt *lrowindices; 5364 PetscErrorCode ierr; 5365 PetscSF sf,osf; 5366 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5367 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5368 MPI_Comm comm; 5369 ISLocalToGlobalMapping mapping; 5370 5371 PetscFunctionBegin; 5372 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5373 /* plocalsize is the number of roots 5374 * nrows is the number of leaves 5375 * */ 5376 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5377 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5378 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5379 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5380 for (i=0;i<nrows;i++) { 5381 /* Find a remote index and an owner for a row 5382 * The row could be local or remote 5383 * */ 5384 owner = 0; 5385 lidx = 0; 5386 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5387 iremote[i].index = lidx; 5388 iremote[i].rank = owner; 5389 } 5390 /* Create SF to communicate how many nonzero columns for each row */ 5391 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5392 /* SF will figure out the number of nonzero colunms for each row, and their 5393 * offsets 5394 * */ 5395 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5396 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5397 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5398 5399 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5400 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5401 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5402 roffsets[0] = 0; 5403 roffsets[1] = 0; 5404 for (i=0;i<plocalsize;i++) { 5405 /* diag */ 5406 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5407 /* off diag */ 5408 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5409 /* compute offsets so that we relative location for each row */ 5410 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5411 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5412 } 5413 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5414 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5415 /* 'r' means root, and 'l' means leaf */ 5416 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5417 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5418 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5419 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5420 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5421 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5422 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5423 dntotalcols = 0; 5424 ontotalcols = 0; 5425 ncol = 0; 5426 for (i=0;i<nrows;i++) { 5427 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5428 ncol = PetscMax(pnnz[i],ncol); 5429 /* diag */ 5430 dntotalcols += nlcols[i*2+0]; 5431 /* off diag */ 5432 ontotalcols += nlcols[i*2+1]; 5433 } 5434 /* We do not need to figure the right number of columns 5435 * since all the calculations will be done by going through the raw data 5436 * */ 5437 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5438 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5439 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5440 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5441 /* diag */ 5442 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5443 /* off diag */ 5444 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5445 /* diag */ 5446 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5447 /* off diag */ 5448 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5449 dntotalcols = 0; 5450 ontotalcols = 0; 5451 ntotalcols = 0; 5452 for (i=0;i<nrows;i++) { 5453 owner = 0; 5454 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5455 /* Set iremote for diag matrix */ 5456 for (j=0;j<nlcols[i*2+0];j++) { 5457 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5458 iremote[dntotalcols].rank = owner; 5459 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5460 ilocal[dntotalcols++] = ntotalcols++; 5461 } 5462 /* off diag */ 5463 for (j=0;j<nlcols[i*2+1];j++) { 5464 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5465 oiremote[ontotalcols].rank = owner; 5466 oilocal[ontotalcols++] = ntotalcols++; 5467 } 5468 } 5469 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5470 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5471 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5472 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5473 /* P serves as roots and P_oth is leaves 5474 * Diag matrix 5475 * */ 5476 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5477 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5478 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5479 5480 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5481 /* Off diag */ 5482 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5483 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5484 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5485 /* We operate on the matrix internal data for saving memory */ 5486 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5487 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5488 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5489 /* Convert to global indices for diag matrix */ 5490 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5491 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5492 /* We want P_oth store global indices */ 5493 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5494 /* Use memory scalable approach */ 5495 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5496 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5497 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5498 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5499 /* Convert back to local indices */ 5500 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5501 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5502 nout = 0; 5503 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5504 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5505 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5506 /* Exchange values */ 5507 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5508 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5509 /* Stop PETSc from shrinking memory */ 5510 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5511 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5512 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5513 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5514 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5515 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5516 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5517 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5518 PetscFunctionReturn(0); 5519 } 5520 5521 /* 5522 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5523 * This supports MPIAIJ and MAIJ 5524 * */ 5525 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5526 { 5527 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5528 Mat_SeqAIJ *p_oth; 5529 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5530 IS rows,map; 5531 PetscHMapI hamp; 5532 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5533 MPI_Comm comm; 5534 PetscSF sf,osf; 5535 PetscBool has; 5536 PetscErrorCode ierr; 5537 5538 PetscFunctionBegin; 5539 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5540 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5541 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5542 * and then create a submatrix (that often is an overlapping matrix) 5543 * */ 5544 if (reuse == MAT_INITIAL_MATRIX) { 5545 /* Use a hash table to figure out unique keys */ 5546 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5547 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5548 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5549 count = 0; 5550 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5551 for (i=0;i<a->B->cmap->n;i++) { 5552 key = a->garray[i]/dof; 5553 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5554 if (!has) { 5555 mapping[i] = count; 5556 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5557 } else { 5558 /* Current 'i' has the same value the previous step */ 5559 mapping[i] = count-1; 5560 } 5561 } 5562 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5563 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5564 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5565 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5566 off = 0; 5567 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5568 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5569 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5570 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5571 /* In case, the matrix was already created but users want to recreate the matrix */ 5572 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5573 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5574 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5575 ierr = ISDestroy(&map);CHKERRQ(ierr); 5576 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5577 } else if (reuse == MAT_REUSE_MATRIX) { 5578 /* If matrix was already created, we simply update values using SF objects 5579 * that as attached to the matrix ealier. 5580 * */ 5581 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5582 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5583 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5584 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5585 /* Update values in place */ 5586 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5587 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5588 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5589 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5590 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5591 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5592 PetscFunctionReturn(0); 5593 } 5594 5595 /*@C 5596 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5597 5598 Collective on Mat 5599 5600 Input Parameters: 5601 + A,B - the matrices in mpiaij format 5602 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5603 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5604 5605 Output Parameter: 5606 + rowb, colb - index sets of rows and columns of B to extract 5607 - B_seq - the sequential matrix generated 5608 5609 Level: developer 5610 5611 @*/ 5612 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5613 { 5614 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5615 PetscErrorCode ierr; 5616 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5617 IS isrowb,iscolb; 5618 Mat *bseq=NULL; 5619 5620 PetscFunctionBegin; 5621 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5622 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5623 } 5624 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5625 5626 if (scall == MAT_INITIAL_MATRIX) { 5627 start = A->cmap->rstart; 5628 cmap = a->garray; 5629 nzA = a->A->cmap->n; 5630 nzB = a->B->cmap->n; 5631 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5632 ncols = 0; 5633 for (i=0; i<nzB; i++) { /* row < local row index */ 5634 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5635 else break; 5636 } 5637 imark = i; 5638 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5639 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5640 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5641 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5642 } else { 5643 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5644 isrowb = *rowb; iscolb = *colb; 5645 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5646 bseq[0] = *B_seq; 5647 } 5648 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5649 *B_seq = bseq[0]; 5650 ierr = PetscFree(bseq);CHKERRQ(ierr); 5651 if (!rowb) { 5652 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5653 } else { 5654 *rowb = isrowb; 5655 } 5656 if (!colb) { 5657 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5658 } else { 5659 *colb = iscolb; 5660 } 5661 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5662 PetscFunctionReturn(0); 5663 } 5664 5665 /* 5666 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5667 of the OFF-DIAGONAL portion of local A 5668 5669 Collective on Mat 5670 5671 Input Parameters: 5672 + A,B - the matrices in mpiaij format 5673 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5674 5675 Output Parameter: 5676 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5677 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5678 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5679 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5680 5681 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5682 for this matrix. This is not desirable.. 5683 5684 Level: developer 5685 5686 */ 5687 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5688 { 5689 PetscErrorCode ierr; 5690 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5691 Mat_SeqAIJ *b_oth; 5692 VecScatter ctx; 5693 MPI_Comm comm; 5694 const PetscMPIInt *rprocs,*sprocs; 5695 const PetscInt *srow,*rstarts,*sstarts; 5696 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5697 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5698 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5699 MPI_Request *rwaits = NULL,*swaits = NULL; 5700 MPI_Status rstatus; 5701 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5702 5703 PetscFunctionBegin; 5704 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5705 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5706 5707 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5708 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5709 } 5710 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5711 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5712 5713 if (size == 1) { 5714 startsj_s = NULL; 5715 bufa_ptr = NULL; 5716 *B_oth = NULL; 5717 PetscFunctionReturn(0); 5718 } 5719 5720 ctx = a->Mvctx; 5721 tag = ((PetscObject)ctx)->tag; 5722 5723 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5724 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5725 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5726 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5727 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5728 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5729 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5730 5731 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5732 if (scall == MAT_INITIAL_MATRIX) { 5733 /* i-array */ 5734 /*---------*/ 5735 /* post receives */ 5736 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5737 for (i=0; i<nrecvs; i++) { 5738 rowlen = rvalues + rstarts[i]*rbs; 5739 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5740 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5741 } 5742 5743 /* pack the outgoing message */ 5744 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5745 5746 sstartsj[0] = 0; 5747 rstartsj[0] = 0; 5748 len = 0; /* total length of j or a array to be sent */ 5749 if (nsends) { 5750 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5751 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5752 } 5753 for (i=0; i<nsends; i++) { 5754 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5755 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5756 for (j=0; j<nrows; j++) { 5757 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5758 for (l=0; l<sbs; l++) { 5759 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5760 5761 rowlen[j*sbs+l] = ncols; 5762 5763 len += ncols; 5764 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5765 } 5766 k++; 5767 } 5768 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5769 5770 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5771 } 5772 /* recvs and sends of i-array are completed */ 5773 i = nrecvs; 5774 while (i--) { 5775 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5776 } 5777 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5778 ierr = PetscFree(svalues);CHKERRQ(ierr); 5779 5780 /* allocate buffers for sending j and a arrays */ 5781 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5782 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5783 5784 /* create i-array of B_oth */ 5785 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5786 5787 b_othi[0] = 0; 5788 len = 0; /* total length of j or a array to be received */ 5789 k = 0; 5790 for (i=0; i<nrecvs; i++) { 5791 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5792 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5793 for (j=0; j<nrows; j++) { 5794 b_othi[k+1] = b_othi[k] + rowlen[j]; 5795 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5796 k++; 5797 } 5798 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5799 } 5800 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5801 5802 /* allocate space for j and a arrrays of B_oth */ 5803 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5804 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5805 5806 /* j-array */ 5807 /*---------*/ 5808 /* post receives of j-array */ 5809 for (i=0; i<nrecvs; i++) { 5810 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5811 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5812 } 5813 5814 /* pack the outgoing message j-array */ 5815 if (nsends) k = sstarts[0]; 5816 for (i=0; i<nsends; i++) { 5817 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5818 bufJ = bufj+sstartsj[i]; 5819 for (j=0; j<nrows; j++) { 5820 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5821 for (ll=0; ll<sbs; ll++) { 5822 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5823 for (l=0; l<ncols; l++) { 5824 *bufJ++ = cols[l]; 5825 } 5826 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5827 } 5828 } 5829 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5830 } 5831 5832 /* recvs and sends of j-array are completed */ 5833 i = nrecvs; 5834 while (i--) { 5835 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5836 } 5837 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5838 } else if (scall == MAT_REUSE_MATRIX) { 5839 sstartsj = *startsj_s; 5840 rstartsj = *startsj_r; 5841 bufa = *bufa_ptr; 5842 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5843 b_otha = b_oth->a; 5844 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5845 5846 /* a-array */ 5847 /*---------*/ 5848 /* post receives of a-array */ 5849 for (i=0; i<nrecvs; i++) { 5850 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5851 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5852 } 5853 5854 /* pack the outgoing message a-array */ 5855 if (nsends) k = sstarts[0]; 5856 for (i=0; i<nsends; i++) { 5857 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5858 bufA = bufa+sstartsj[i]; 5859 for (j=0; j<nrows; j++) { 5860 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5861 for (ll=0; ll<sbs; ll++) { 5862 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5863 for (l=0; l<ncols; l++) { 5864 *bufA++ = vals[l]; 5865 } 5866 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5867 } 5868 } 5869 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5870 } 5871 /* recvs and sends of a-array are completed */ 5872 i = nrecvs; 5873 while (i--) { 5874 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5875 } 5876 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5877 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5878 5879 if (scall == MAT_INITIAL_MATRIX) { 5880 /* put together the new matrix */ 5881 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5882 5883 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5884 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5885 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5886 b_oth->free_a = PETSC_TRUE; 5887 b_oth->free_ij = PETSC_TRUE; 5888 b_oth->nonew = 0; 5889 5890 ierr = PetscFree(bufj);CHKERRQ(ierr); 5891 if (!startsj_s || !bufa_ptr) { 5892 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5893 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5894 } else { 5895 *startsj_s = sstartsj; 5896 *startsj_r = rstartsj; 5897 *bufa_ptr = bufa; 5898 } 5899 } 5900 5901 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5902 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5903 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5904 PetscFunctionReturn(0); 5905 } 5906 5907 /*@C 5908 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5909 5910 Not Collective 5911 5912 Input Parameters: 5913 . A - The matrix in mpiaij format 5914 5915 Output Parameter: 5916 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5917 . colmap - A map from global column index to local index into lvec 5918 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5919 5920 Level: developer 5921 5922 @*/ 5923 #if defined(PETSC_USE_CTABLE) 5924 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5925 #else 5926 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5927 #endif 5928 { 5929 Mat_MPIAIJ *a; 5930 5931 PetscFunctionBegin; 5932 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5933 PetscValidPointer(lvec, 2); 5934 PetscValidPointer(colmap, 3); 5935 PetscValidPointer(multScatter, 4); 5936 a = (Mat_MPIAIJ*) A->data; 5937 if (lvec) *lvec = a->lvec; 5938 if (colmap) *colmap = a->colmap; 5939 if (multScatter) *multScatter = a->Mvctx; 5940 PetscFunctionReturn(0); 5941 } 5942 5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5944 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5946 #if defined(PETSC_HAVE_MKL_SPARSE) 5947 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5948 #endif 5949 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5951 #if defined(PETSC_HAVE_ELEMENTAL) 5952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5953 #endif 5954 #if defined(PETSC_HAVE_SCALAPACK) 5955 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5956 #endif 5957 #if defined(PETSC_HAVE_HYPRE) 5958 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5959 #endif 5960 #if defined(PETSC_HAVE_CUDA) 5961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5962 #endif 5963 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5965 #endif 5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5967 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5968 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5969 5970 /* 5971 Computes (B'*A')' since computing B*A directly is untenable 5972 5973 n p p 5974 [ ] [ ] [ ] 5975 m [ A ] * n [ B ] = m [ C ] 5976 [ ] [ ] [ ] 5977 5978 */ 5979 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5980 { 5981 PetscErrorCode ierr; 5982 Mat At,Bt,Ct; 5983 5984 PetscFunctionBegin; 5985 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5986 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5987 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5988 ierr = MatDestroy(&At);CHKERRQ(ierr); 5989 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5990 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5991 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5992 PetscFunctionReturn(0); 5993 } 5994 5995 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5996 { 5997 PetscErrorCode ierr; 5998 PetscBool cisdense; 5999 6000 PetscFunctionBegin; 6001 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 6002 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 6003 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 6004 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 6005 if (!cisdense) { 6006 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 6007 } 6008 ierr = MatSetUp(C);CHKERRQ(ierr); 6009 6010 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6011 PetscFunctionReturn(0); 6012 } 6013 6014 /* ----------------------------------------------------------------*/ 6015 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6016 { 6017 Mat_Product *product = C->product; 6018 Mat A = product->A,B=product->B; 6019 6020 PetscFunctionBegin; 6021 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6022 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6023 6024 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6025 C->ops->productsymbolic = MatProductSymbolic_AB; 6026 PetscFunctionReturn(0); 6027 } 6028 6029 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6030 { 6031 PetscErrorCode ierr; 6032 Mat_Product *product = C->product; 6033 6034 PetscFunctionBegin; 6035 if (product->type == MATPRODUCT_AB) { 6036 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6037 } 6038 PetscFunctionReturn(0); 6039 } 6040 /* ----------------------------------------------------------------*/ 6041 6042 /*MC 6043 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6044 6045 Options Database Keys: 6046 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6047 6048 Level: beginner 6049 6050 Notes: 6051 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6052 in this case the values associated with the rows and columns one passes in are set to zero 6053 in the matrix 6054 6055 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6056 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6057 6058 .seealso: MatCreateAIJ() 6059 M*/ 6060 6061 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6062 { 6063 Mat_MPIAIJ *b; 6064 PetscErrorCode ierr; 6065 PetscMPIInt size; 6066 6067 PetscFunctionBegin; 6068 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6069 6070 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6071 B->data = (void*)b; 6072 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6073 B->assembled = PETSC_FALSE; 6074 B->insertmode = NOT_SET_VALUES; 6075 b->size = size; 6076 6077 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6078 6079 /* build cache for off array entries formed */ 6080 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6081 6082 b->donotstash = PETSC_FALSE; 6083 b->colmap = NULL; 6084 b->garray = NULL; 6085 b->roworiented = PETSC_TRUE; 6086 6087 /* stuff used for matrix vector multiply */ 6088 b->lvec = NULL; 6089 b->Mvctx = NULL; 6090 6091 /* stuff for MatGetRow() */ 6092 b->rowindices = NULL; 6093 b->rowvalues = NULL; 6094 b->getrowactive = PETSC_FALSE; 6095 6096 /* flexible pointer used in CUSP/CUSPARSE classes */ 6097 b->spptr = NULL; 6098 6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6109 #if defined(PETSC_HAVE_CUDA) 6110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr); 6111 #endif 6112 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr); 6114 #endif 6115 #if defined(PETSC_HAVE_MKL_SPARSE) 6116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6117 #endif 6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6119 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6120 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6121 #if defined(PETSC_HAVE_ELEMENTAL) 6122 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6123 #endif 6124 #if defined(PETSC_HAVE_SCALAPACK) 6125 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6126 #endif 6127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6128 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6129 #if defined(PETSC_HAVE_HYPRE) 6130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6131 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6132 #endif 6133 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6134 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6135 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6136 PetscFunctionReturn(0); 6137 } 6138 6139 /*@C 6140 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6141 and "off-diagonal" part of the matrix in CSR format. 6142 6143 Collective 6144 6145 Input Parameters: 6146 + comm - MPI communicator 6147 . m - number of local rows (Cannot be PETSC_DECIDE) 6148 . n - This value should be the same as the local size used in creating the 6149 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6150 calculated if N is given) For square matrices n is almost always m. 6151 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6152 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6153 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6154 . j - column indices 6155 . a - matrix values 6156 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6157 . oj - column indices 6158 - oa - matrix values 6159 6160 Output Parameter: 6161 . mat - the matrix 6162 6163 Level: advanced 6164 6165 Notes: 6166 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6167 must free the arrays once the matrix has been destroyed and not before. 6168 6169 The i and j indices are 0 based 6170 6171 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6172 6173 This sets local rows and cannot be used to set off-processor values. 6174 6175 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6176 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6177 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6178 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6179 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6180 communication if it is known that only local entries will be set. 6181 6182 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6183 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6184 @*/ 6185 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6186 { 6187 PetscErrorCode ierr; 6188 Mat_MPIAIJ *maij; 6189 6190 PetscFunctionBegin; 6191 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6192 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6193 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6194 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6195 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6196 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6197 maij = (Mat_MPIAIJ*) (*mat)->data; 6198 6199 (*mat)->preallocated = PETSC_TRUE; 6200 6201 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6202 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6203 6204 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6205 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6206 6207 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6208 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6209 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6210 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6211 6212 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6213 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6214 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6215 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6216 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6217 PetscFunctionReturn(0); 6218 } 6219 6220 /* 6221 Special version for direct calls from Fortran 6222 */ 6223 #include <petsc/private/fortranimpl.h> 6224 6225 /* Change these macros so can be used in void function */ 6226 #undef CHKERRQ 6227 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6228 #undef SETERRQ2 6229 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6230 #undef SETERRQ3 6231 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6232 #undef SETERRQ 6233 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6234 6235 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6236 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6237 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6238 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6239 #else 6240 #endif 6241 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6242 { 6243 Mat mat = *mmat; 6244 PetscInt m = *mm, n = *mn; 6245 InsertMode addv = *maddv; 6246 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6247 PetscScalar value; 6248 PetscErrorCode ierr; 6249 6250 MatCheckPreallocated(mat,1); 6251 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6252 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6253 { 6254 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6255 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6256 PetscBool roworiented = aij->roworiented; 6257 6258 /* Some Variables required in the macro */ 6259 Mat A = aij->A; 6260 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6261 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6262 MatScalar *aa = a->a; 6263 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6264 Mat B = aij->B; 6265 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6266 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6267 MatScalar *ba = b->a; 6268 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6269 * cannot use "#if defined" inside a macro. */ 6270 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6271 6272 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6273 PetscInt nonew = a->nonew; 6274 MatScalar *ap1,*ap2; 6275 6276 PetscFunctionBegin; 6277 for (i=0; i<m; i++) { 6278 if (im[i] < 0) continue; 6279 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6280 if (im[i] >= rstart && im[i] < rend) { 6281 row = im[i] - rstart; 6282 lastcol1 = -1; 6283 rp1 = aj + ai[row]; 6284 ap1 = aa + ai[row]; 6285 rmax1 = aimax[row]; 6286 nrow1 = ailen[row]; 6287 low1 = 0; 6288 high1 = nrow1; 6289 lastcol2 = -1; 6290 rp2 = bj + bi[row]; 6291 ap2 = ba + bi[row]; 6292 rmax2 = bimax[row]; 6293 nrow2 = bilen[row]; 6294 low2 = 0; 6295 high2 = nrow2; 6296 6297 for (j=0; j<n; j++) { 6298 if (roworiented) value = v[i*n+j]; 6299 else value = v[i+j*m]; 6300 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6301 if (in[j] >= cstart && in[j] < cend) { 6302 col = in[j] - cstart; 6303 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6304 #if defined(PETSC_HAVE_DEVICE) 6305 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6306 #endif 6307 } else if (in[j] < 0) continue; 6308 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6309 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6310 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6311 } else { 6312 if (mat->was_assembled) { 6313 if (!aij->colmap) { 6314 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6315 } 6316 #if defined(PETSC_USE_CTABLE) 6317 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6318 col--; 6319 #else 6320 col = aij->colmap[in[j]] - 1; 6321 #endif 6322 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6323 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6324 col = in[j]; 6325 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6326 B = aij->B; 6327 b = (Mat_SeqAIJ*)B->data; 6328 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6329 rp2 = bj + bi[row]; 6330 ap2 = ba + bi[row]; 6331 rmax2 = bimax[row]; 6332 nrow2 = bilen[row]; 6333 low2 = 0; 6334 high2 = nrow2; 6335 bm = aij->B->rmap->n; 6336 ba = b->a; 6337 inserted = PETSC_FALSE; 6338 } 6339 } else col = in[j]; 6340 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6341 #if defined(PETSC_HAVE_DEVICE) 6342 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6343 #endif 6344 } 6345 } 6346 } else if (!aij->donotstash) { 6347 if (roworiented) { 6348 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6349 } else { 6350 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6351 } 6352 } 6353 } 6354 } 6355 PetscFunctionReturnVoid(); 6356 } 6357