1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = NULL; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1282 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 const PetscInt *garray = aij->garray; 1316 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1317 PetscInt *rowlens; 1318 PetscInt *colidxs; 1319 PetscScalar *matvals; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1324 1325 M = mat->rmap->N; 1326 N = mat->cmap->N; 1327 m = mat->rmap->n; 1328 rs = mat->rmap->rstart; 1329 cs = mat->cmap->rstart; 1330 nz = A->nz + B->nz; 1331 1332 /* write matrix header */ 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = M; header[2] = N; header[3] = nz; 1335 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1337 1338 /* fill in and store row lengths */ 1339 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1340 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1341 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1343 1344 /* fill in and store column indices */ 1345 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1346 for (cnt=0, i=0; i<m; i++) { 1347 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1348 if (garray[B->j[jb]] > cs) break; 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1352 colidxs[cnt++] = A->j[ja] + cs; 1353 for (; jb<B->i[i+1]; jb++) 1354 colidxs[cnt++] = garray[B->j[jb]]; 1355 } 1356 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1357 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1358 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1359 1360 /* fill in and store nonzero values */ 1361 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1362 for (cnt=0, i=0; i<m; i++) { 1363 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1364 if (garray[B->j[jb]] > cs) break; 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1368 matvals[cnt++] = A->a[ja]; 1369 for (; jb<B->i[i+1]; jb++) 1370 matvals[cnt++] = B->a[jb]; 1371 } 1372 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1373 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = PetscFree(matvals);CHKERRQ(ierr); 1375 1376 /* write block size option to the viewer's .info file */ 1377 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #include <petscdraw.h> 1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1383 { 1384 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1385 PetscErrorCode ierr; 1386 PetscMPIInt rank = aij->rank,size = aij->size; 1387 PetscBool isdraw,iascii,isbinary; 1388 PetscViewer sviewer; 1389 PetscViewerFormat format; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1394 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 if (iascii) { 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1398 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1399 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1400 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1401 for (i=0; i<(PetscInt)size; i++) { 1402 nmax = PetscMax(nmax,nz[i]); 1403 nmin = PetscMin(nmin,nz[i]); 1404 navg += nz[i]; 1405 } 1406 ierr = PetscFree(nz);CHKERRQ(ierr); 1407 navg = navg/size; 1408 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1413 MatInfo info; 1414 PetscBool inodes; 1415 1416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1417 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1418 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1420 if (!inodes) { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1425 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1426 } 1427 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1434 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1437 PetscInt inodecount,inodelimit,*inodes; 1438 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1439 if (inodes) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1446 PetscFunctionReturn(0); 1447 } 1448 } else if (isbinary) { 1449 if (size == 1) { 1450 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1451 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (iascii && size == 1) { 1457 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1458 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } else if (isdraw) { 1461 PetscDraw draw; 1462 PetscBool isnull; 1463 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1464 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1465 if (isnull) PetscFunctionReturn(0); 1466 } 1467 1468 { /* assemble the entire matrix onto first processor */ 1469 Mat A = NULL, Av; 1470 IS isrow,iscol; 1471 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1474 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1475 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1476 /* The commented code uses MatCreateSubMatrices instead */ 1477 /* 1478 Mat *AA, A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1486 A = AA[0]; 1487 Av = AA[0]; 1488 } 1489 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1490 */ 1491 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1492 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1493 /* 1494 Everyone has to call to draw the matrix since the graphics waits are 1495 synchronized across all processors that share the PetscDraw object 1496 */ 1497 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1498 if (!rank) { 1499 if (((PetscObject)mat)->name) { 1500 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1501 } 1502 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = NULL; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i]; 1680 PetscMPIInt rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt col = cdest[aj[j]]; 1684 PetscMPIInt cowner; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt col = gcdest[bj[j]]; 1691 PetscMPIInt cowner; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscLogDouble isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 case MAT_SORTED_FULL: 1819 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1820 break; 1821 case MAT_IGNORE_OFF_PROC_ENTRIES: 1822 a->donotstash = flg; 1823 break; 1824 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1825 case MAT_SPD: 1826 case MAT_SYMMETRIC: 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 case MAT_HERMITIAN: 1829 case MAT_SYMMETRY_ETERNAL: 1830 break; 1831 case MAT_SUBMAT_SINGLEIS: 1832 A->submat_singleis = flg; 1833 break; 1834 case MAT_STRUCTURE_ONLY: 1835 /* The option is handled directly by MatSetOption() */ 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1844 { 1845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1846 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1847 PetscErrorCode ierr; 1848 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1849 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1850 PetscInt *cmap,*idx_p; 1851 1852 PetscFunctionBegin; 1853 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1854 mat->getrowactive = PETSC_TRUE; 1855 1856 if (!mat->rowvalues && (idx || v)) { 1857 /* 1858 allocate enough space to hold information from the longest row. 1859 */ 1860 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1861 PetscInt max = 1,tmp; 1862 for (i=0; i<matin->rmap->n; i++) { 1863 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1864 if (max < tmp) max = tmp; 1865 } 1866 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1867 } 1868 1869 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1870 lrow = row - rstart; 1871 1872 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1873 if (!v) {pvA = NULL; pvB = NULL;} 1874 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1875 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1876 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1877 nztot = nzA + nzB; 1878 1879 cmap = mat->garray; 1880 if (v || idx) { 1881 if (nztot) { 1882 /* Sort by increasing column numbers, assuming A and B already sorted */ 1883 PetscInt imark = -1; 1884 if (v) { 1885 *v = v_p = mat->rowvalues; 1886 for (i=0; i<nzB; i++) { 1887 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1888 else break; 1889 } 1890 imark = i; 1891 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1892 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1893 } 1894 if (idx) { 1895 *idx = idx_p = mat->rowindices; 1896 if (imark > -1) { 1897 for (i=0; i<imark; i++) { 1898 idx_p[i] = cmap[cworkB[i]]; 1899 } 1900 } else { 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1903 else break; 1904 } 1905 imark = i; 1906 } 1907 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1908 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 if (idx) *idx = NULL; 1912 if (v) *v = NULL; 1913 } 1914 } 1915 *nz = nztot; 1916 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1917 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1922 { 1923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1924 1925 PetscFunctionBegin; 1926 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1927 aij->getrowactive = PETSC_FALSE; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1932 { 1933 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1934 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1935 PetscErrorCode ierr; 1936 PetscInt i,j,cstart = mat->cmap->rstart; 1937 PetscReal sum = 0.0; 1938 MatScalar *v; 1939 1940 PetscFunctionBegin; 1941 if (aij->size == 1) { 1942 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1943 } else { 1944 if (type == NORM_FROBENIUS) { 1945 v = amat->a; 1946 for (i=0; i<amat->nz; i++) { 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1952 } 1953 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1999 { 2000 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2001 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2002 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2003 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 const MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2237 will be updated */ 2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2239 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2240 Y->offloadmask = PETSC_OFFLOAD_CPU; 2241 } 2242 #endif 2243 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2244 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2245 } else { 2246 Mat B; 2247 PetscInt *nnz_d,*nnz_o; 2248 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2249 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2250 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2252 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2253 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2254 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2255 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2256 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2257 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2258 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2259 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2260 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2266 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2283 { 2284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2289 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 PetscInt i,*idxb = NULL,m = A->rmap->n; 2309 PetscScalar *va,*vv; 2310 Vec vB,vA; 2311 const PetscScalar *vb; 2312 2313 PetscFunctionBegin; 2314 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 2315 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 2316 2317 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 2318 if (idx) { 2319 for (i=0; i<m; i++) { 2320 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2321 } 2322 } 2323 2324 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 2325 if (idx) { 2326 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 2327 } 2328 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 2329 2330 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 2331 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 2332 for (i=0; i<m; i++) { 2333 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2334 vv[i] = vb[i]; 2335 if (idx) idx[i] = a->garray[idxb[i]]; 2336 } else { 2337 vv[i] = va[i]; 2338 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idx[i] > a->garray[idxb[i]]) 2339 idx[i] = a->garray[idxb[i]]; 2340 } 2341 } 2342 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 2343 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 2344 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 2345 ierr = PetscFree(idxb);CHKERRQ(ierr); 2346 ierr = VecDestroy(&vA);CHKERRQ(ierr); 2347 ierr = VecDestroy(&vB);CHKERRQ(ierr); 2348 PetscFunctionReturn(0); 2349 } 2350 2351 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2352 { 2353 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2354 PetscInt m = A->rmap->n,n = A->cmap->n; 2355 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2356 PetscInt *cmap = mat->garray; 2357 PetscInt *diagIdx, *offdiagIdx; 2358 Vec diagV, offdiagV; 2359 PetscScalar *a, *diagA, *offdiagA, *ba; 2360 PetscInt r,j,col,ncols,*bi,*bj; 2361 PetscErrorCode ierr; 2362 Mat B = mat->B; 2363 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2364 2365 PetscFunctionBegin; 2366 /* When a process holds entire A and other processes have no entry */ 2367 if (A->cmap->N == n) { 2368 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2369 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2370 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr); 2371 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2372 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2373 PetscFunctionReturn(0); 2374 } else if (n == 0) { 2375 if (m) { 2376 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2377 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2378 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2384 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2385 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2386 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2387 2388 /* Get offdiagIdx[] for implicit 0.0 */ 2389 ba = b->a; 2390 bi = b->i; 2391 bj = b->j; 2392 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2393 for (r = 0; r < m; r++) { 2394 ncols = bi[r+1] - bi[r]; 2395 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2396 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2397 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2398 offdiagA[r] = 0.0; 2399 2400 /* Find first hole in the cmap */ 2401 for (j=0; j<ncols; j++) { 2402 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2403 if (col > j && j < cstart) { 2404 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2405 break; 2406 } else if (col > j + n && j >= cstart) { 2407 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2408 break; 2409 } 2410 } 2411 if (j == ncols && B->cmap->N < A->cmap->N - n) { 2412 /* a hole is outside compressed Bcols */ 2413 if (ncols == 0) { 2414 if (cstart) { 2415 offdiagIdx[r] = 0; 2416 } else offdiagIdx[r] = cend; 2417 } else { /* ncols > 0 */ 2418 offdiagIdx[r] = cmap[ncols-1] + 1; 2419 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2420 } 2421 } 2422 } 2423 2424 for (j=0; j<ncols; j++) { 2425 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2426 ba++; bj++; 2427 } 2428 } 2429 2430 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2431 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2432 for (r = 0; r < m; ++r) { 2433 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2434 a[r] = diagA[r]; 2435 if (idx) idx[r] = cstart + diagIdx[r]; 2436 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2437 a[r] = diagA[r]; 2438 if (idx) { 2439 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2440 idx[r] = cstart + diagIdx[r]; 2441 } else idx[r] = offdiagIdx[r]; 2442 } 2443 } else { 2444 a[r] = offdiagA[r]; 2445 if (idx) idx[r] = offdiagIdx[r]; 2446 } 2447 } 2448 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2449 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2450 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2451 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2452 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2453 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2458 { 2459 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2460 PetscInt m = A->rmap->n,n = A->cmap->n; 2461 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2462 PetscInt *cmap = mat->garray; 2463 PetscInt *diagIdx, *offdiagIdx; 2464 Vec diagV, offdiagV; 2465 PetscScalar *a, *diagA, *offdiagA, *ba; 2466 PetscInt r,j,col,ncols,*bi,*bj; 2467 PetscErrorCode ierr; 2468 Mat B = mat->B; 2469 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2470 2471 PetscFunctionBegin; 2472 /* When a process holds entire A and other processes have no entry */ 2473 if (A->cmap->N == n) { 2474 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2475 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2476 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr); 2477 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2478 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } else if (n == 0) { 2481 if (m) { 2482 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2483 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2484 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2485 } 2486 PetscFunctionReturn(0); 2487 } 2488 2489 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2490 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2491 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2492 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2493 2494 /* Get offdiagIdx[] for implicit 0.0 */ 2495 ba = b->a; 2496 bi = b->i; 2497 bj = b->j; 2498 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2499 for (r = 0; r < m; r++) { 2500 ncols = bi[r+1] - bi[r]; 2501 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2502 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2503 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2504 offdiagA[r] = 0.0; 2505 2506 /* Find first hole in the cmap */ 2507 for (j=0; j<ncols; j++) { 2508 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2509 if (col > j && j < cstart) { 2510 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2511 break; 2512 } else if (col > j + n && j >= cstart) { 2513 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2514 break; 2515 } 2516 } 2517 if (j == ncols && B->cmap->N < A->cmap->N - n) { 2518 /* a hole is outside compressed Bcols */ 2519 if (ncols == 0) { 2520 if (cstart) { 2521 offdiagIdx[r] = 0; 2522 } else offdiagIdx[r] = cend; 2523 } else { /* ncols > 0 */ 2524 offdiagIdx[r] = cmap[ncols-1] + 1; 2525 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2526 } 2527 } 2528 } 2529 2530 for (j=0; j<ncols; j++) { 2531 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2532 ba++; bj++; 2533 } 2534 } 2535 2536 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2537 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2538 for (r = 0; r < m; ++r) { 2539 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2540 a[r] = diagA[r]; 2541 if (idx) idx[r] = cstart + diagIdx[r]; 2542 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2543 a[r] = diagA[r]; 2544 if (idx) { 2545 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2546 idx[r] = cstart + diagIdx[r]; 2547 } else idx[r] = offdiagIdx[r]; 2548 } 2549 } else { 2550 a[r] = offdiagA[r]; 2551 if (idx) idx[r] = offdiagIdx[r]; 2552 } 2553 } 2554 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2555 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2556 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2557 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2558 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2559 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2560 PetscFunctionReturn(0); 2561 } 2562 2563 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2564 { 2565 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2566 PetscInt m = A->rmap->n,n = A->cmap->n; 2567 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2568 PetscInt *cmap = mat->garray; 2569 PetscInt *diagIdx, *offdiagIdx; 2570 Vec diagV, offdiagV; 2571 PetscScalar *a, *diagA, *offdiagA, *ba; 2572 PetscInt r,j,col,ncols,*bi,*bj; 2573 PetscErrorCode ierr; 2574 Mat B = mat->B; 2575 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2576 2577 PetscFunctionBegin; 2578 /* When a process holds entire A and other processes have no entry */ 2579 if (A->cmap->N == n) { 2580 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2581 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2582 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2583 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2584 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2585 PetscFunctionReturn(0); 2586 } else if (n == 0) { 2587 if (m) { 2588 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2589 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2590 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2591 } 2592 PetscFunctionReturn(0); 2593 } 2594 2595 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2596 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2597 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2598 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2599 2600 /* Get offdiagIdx[] for implicit 0.0 */ 2601 ba = b->a; 2602 bi = b->i; 2603 bj = b->j; 2604 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2605 for (r = 0; r < m; r++) { 2606 ncols = bi[r+1] - bi[r]; 2607 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2608 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2609 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2610 offdiagA[r] = 0.0; 2611 2612 /* Find first hole in the cmap */ 2613 for (j=0; j<ncols; j++) { 2614 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2615 if (col > j && j < cstart) { 2616 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2617 break; 2618 } else if (col > j + n && j >= cstart) { 2619 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2620 break; 2621 } 2622 } 2623 if (j == ncols && B->cmap->N < A->cmap->N - n) { 2624 /* a hole is outside compressed Bcols */ 2625 if (ncols == 0) { 2626 if (cstart) { 2627 offdiagIdx[r] = 0; 2628 } else offdiagIdx[r] = cend; 2629 } else { /* ncols > 0 */ 2630 offdiagIdx[r] = cmap[ncols-1] + 1; 2631 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2632 } 2633 } 2634 } 2635 2636 for (j=0; j<ncols; j++) { 2637 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2638 ba++; bj++; 2639 } 2640 } 2641 2642 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2643 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2644 for (r = 0; r < m; ++r) { 2645 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2646 a[r] = diagA[r]; 2647 if (idx) idx[r] = cstart + diagIdx[r]; 2648 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2649 a[r] = diagA[r]; 2650 if (idx) { 2651 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2652 idx[r] = cstart + diagIdx[r]; 2653 } else idx[r] = offdiagIdx[r]; 2654 } 2655 } else { 2656 a[r] = offdiagA[r]; 2657 if (idx) idx[r] = offdiagIdx[r]; 2658 } 2659 } 2660 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2661 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2662 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2663 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2664 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2665 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2666 PetscFunctionReturn(0); 2667 } 2668 2669 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2670 { 2671 PetscErrorCode ierr; 2672 Mat *dummy; 2673 2674 PetscFunctionBegin; 2675 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2676 *newmat = *dummy; 2677 ierr = PetscFree(dummy);CHKERRQ(ierr); 2678 PetscFunctionReturn(0); 2679 } 2680 2681 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2682 { 2683 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2684 PetscErrorCode ierr; 2685 2686 PetscFunctionBegin; 2687 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2688 A->factorerrortype = a->A->factorerrortype; 2689 PetscFunctionReturn(0); 2690 } 2691 2692 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2693 { 2694 PetscErrorCode ierr; 2695 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2696 2697 PetscFunctionBegin; 2698 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2699 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2700 if (x->assembled) { 2701 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2702 } else { 2703 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2704 } 2705 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2706 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 2710 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2711 { 2712 PetscFunctionBegin; 2713 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2714 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2715 PetscFunctionReturn(0); 2716 } 2717 2718 /*@ 2719 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2720 2721 Collective on Mat 2722 2723 Input Parameters: 2724 + A - the matrix 2725 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2726 2727 Level: advanced 2728 2729 @*/ 2730 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2731 { 2732 PetscErrorCode ierr; 2733 2734 PetscFunctionBegin; 2735 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2740 { 2741 PetscErrorCode ierr; 2742 PetscBool sc = PETSC_FALSE,flg; 2743 2744 PetscFunctionBegin; 2745 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2746 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2747 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2748 if (flg) { 2749 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2750 } 2751 ierr = PetscOptionsTail();CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 2755 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2756 { 2757 PetscErrorCode ierr; 2758 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2759 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2760 2761 PetscFunctionBegin; 2762 if (!Y->preallocated) { 2763 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2764 } else if (!aij->nz) { 2765 PetscInt nonew = aij->nonew; 2766 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2767 aij->nonew = nonew; 2768 } 2769 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2774 { 2775 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2776 PetscErrorCode ierr; 2777 2778 PetscFunctionBegin; 2779 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2780 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2781 if (d) { 2782 PetscInt rstart; 2783 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2784 *d += rstart; 2785 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 2790 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2791 { 2792 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2793 PetscErrorCode ierr; 2794 2795 PetscFunctionBegin; 2796 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2797 PetscFunctionReturn(0); 2798 } 2799 2800 /* -------------------------------------------------------------------*/ 2801 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2802 MatGetRow_MPIAIJ, 2803 MatRestoreRow_MPIAIJ, 2804 MatMult_MPIAIJ, 2805 /* 4*/ MatMultAdd_MPIAIJ, 2806 MatMultTranspose_MPIAIJ, 2807 MatMultTransposeAdd_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 /*10*/ NULL, 2812 NULL, 2813 NULL, 2814 MatSOR_MPIAIJ, 2815 MatTranspose_MPIAIJ, 2816 /*15*/ MatGetInfo_MPIAIJ, 2817 MatEqual_MPIAIJ, 2818 MatGetDiagonal_MPIAIJ, 2819 MatDiagonalScale_MPIAIJ, 2820 MatNorm_MPIAIJ, 2821 /*20*/ MatAssemblyBegin_MPIAIJ, 2822 MatAssemblyEnd_MPIAIJ, 2823 MatSetOption_MPIAIJ, 2824 MatZeroEntries_MPIAIJ, 2825 /*24*/ MatZeroRows_MPIAIJ, 2826 NULL, 2827 NULL, 2828 NULL, 2829 NULL, 2830 /*29*/ MatSetUp_MPIAIJ, 2831 NULL, 2832 NULL, 2833 MatGetDiagonalBlock_MPIAIJ, 2834 NULL, 2835 /*34*/ MatDuplicate_MPIAIJ, 2836 NULL, 2837 NULL, 2838 NULL, 2839 NULL, 2840 /*39*/ MatAXPY_MPIAIJ, 2841 MatCreateSubMatrices_MPIAIJ, 2842 MatIncreaseOverlap_MPIAIJ, 2843 MatGetValues_MPIAIJ, 2844 MatCopy_MPIAIJ, 2845 /*44*/ MatGetRowMax_MPIAIJ, 2846 MatScale_MPIAIJ, 2847 MatShift_MPIAIJ, 2848 MatDiagonalSet_MPIAIJ, 2849 MatZeroRowsColumns_MPIAIJ, 2850 /*49*/ MatSetRandom_MPIAIJ, 2851 NULL, 2852 NULL, 2853 NULL, 2854 NULL, 2855 /*54*/ MatFDColoringCreate_MPIXAIJ, 2856 NULL, 2857 MatSetUnfactored_MPIAIJ, 2858 MatPermute_MPIAIJ, 2859 NULL, 2860 /*59*/ MatCreateSubMatrix_MPIAIJ, 2861 MatDestroy_MPIAIJ, 2862 MatView_MPIAIJ, 2863 NULL, 2864 NULL, 2865 /*64*/ NULL, 2866 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2867 NULL, 2868 NULL, 2869 NULL, 2870 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2871 MatGetRowMinAbs_MPIAIJ, 2872 NULL, 2873 NULL, 2874 NULL, 2875 NULL, 2876 /*75*/ MatFDColoringApply_AIJ, 2877 MatSetFromOptions_MPIAIJ, 2878 NULL, 2879 NULL, 2880 MatFindZeroDiagonals_MPIAIJ, 2881 /*80*/ NULL, 2882 NULL, 2883 NULL, 2884 /*83*/ MatLoad_MPIAIJ, 2885 MatIsSymmetric_MPIAIJ, 2886 NULL, 2887 NULL, 2888 NULL, 2889 NULL, 2890 /*89*/ NULL, 2891 NULL, 2892 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2893 NULL, 2894 NULL, 2895 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2896 NULL, 2897 NULL, 2898 NULL, 2899 MatBindToCPU_MPIAIJ, 2900 /*99*/ MatProductSetFromOptions_MPIAIJ, 2901 NULL, 2902 NULL, 2903 MatConjugate_MPIAIJ, 2904 NULL, 2905 /*104*/MatSetValuesRow_MPIAIJ, 2906 MatRealPart_MPIAIJ, 2907 MatImaginaryPart_MPIAIJ, 2908 NULL, 2909 NULL, 2910 /*109*/NULL, 2911 NULL, 2912 MatGetRowMin_MPIAIJ, 2913 NULL, 2914 MatMissingDiagonal_MPIAIJ, 2915 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2916 NULL, 2917 MatGetGhosts_MPIAIJ, 2918 NULL, 2919 NULL, 2920 /*119*/NULL, 2921 NULL, 2922 NULL, 2923 NULL, 2924 MatGetMultiProcBlock_MPIAIJ, 2925 /*124*/MatFindNonzeroRows_MPIAIJ, 2926 MatGetColumnNorms_MPIAIJ, 2927 MatInvertBlockDiagonal_MPIAIJ, 2928 MatInvertVariableBlockDiagonal_MPIAIJ, 2929 MatCreateSubMatricesMPI_MPIAIJ, 2930 /*129*/NULL, 2931 NULL, 2932 NULL, 2933 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2934 NULL, 2935 /*134*/NULL, 2936 NULL, 2937 NULL, 2938 NULL, 2939 NULL, 2940 /*139*/MatSetBlockSizes_MPIAIJ, 2941 NULL, 2942 NULL, 2943 MatFDColoringSetUp_MPIXAIJ, 2944 MatFindOffBlockDiagonalEntries_MPIAIJ, 2945 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2946 /*145*/NULL, 2947 NULL, 2948 NULL 2949 }; 2950 2951 /* ----------------------------------------------------------------------------------------*/ 2952 2953 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2954 { 2955 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2956 PetscErrorCode ierr; 2957 2958 PetscFunctionBegin; 2959 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2960 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2965 { 2966 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2967 PetscErrorCode ierr; 2968 2969 PetscFunctionBegin; 2970 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2971 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2972 PetscFunctionReturn(0); 2973 } 2974 2975 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2976 { 2977 Mat_MPIAIJ *b; 2978 PetscErrorCode ierr; 2979 PetscMPIInt size; 2980 2981 PetscFunctionBegin; 2982 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2983 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2984 b = (Mat_MPIAIJ*)B->data; 2985 2986 #if defined(PETSC_USE_CTABLE) 2987 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2988 #else 2989 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2990 #endif 2991 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2992 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2993 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2994 2995 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2996 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2997 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2998 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2999 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 3000 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3001 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3002 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3003 3004 if (!B->preallocated) { 3005 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3006 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3007 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3008 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3009 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3010 } 3011 3012 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3013 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3014 B->preallocated = PETSC_TRUE; 3015 B->was_assembled = PETSC_FALSE; 3016 B->assembled = PETSC_FALSE; 3017 PetscFunctionReturn(0); 3018 } 3019 3020 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 3021 { 3022 Mat_MPIAIJ *b; 3023 PetscErrorCode ierr; 3024 3025 PetscFunctionBegin; 3026 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3027 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3028 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3029 b = (Mat_MPIAIJ*)B->data; 3030 3031 #if defined(PETSC_USE_CTABLE) 3032 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3033 #else 3034 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3035 #endif 3036 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3037 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3038 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3039 3040 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 3041 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 3042 B->preallocated = PETSC_TRUE; 3043 B->was_assembled = PETSC_FALSE; 3044 B->assembled = PETSC_FALSE; 3045 PetscFunctionReturn(0); 3046 } 3047 3048 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3049 { 3050 Mat mat; 3051 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3052 PetscErrorCode ierr; 3053 3054 PetscFunctionBegin; 3055 *newmat = NULL; 3056 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3057 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3058 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3059 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3060 a = (Mat_MPIAIJ*)mat->data; 3061 3062 mat->factortype = matin->factortype; 3063 mat->assembled = matin->assembled; 3064 mat->insertmode = NOT_SET_VALUES; 3065 mat->preallocated = matin->preallocated; 3066 3067 a->size = oldmat->size; 3068 a->rank = oldmat->rank; 3069 a->donotstash = oldmat->donotstash; 3070 a->roworiented = oldmat->roworiented; 3071 a->rowindices = NULL; 3072 a->rowvalues = NULL; 3073 a->getrowactive = PETSC_FALSE; 3074 3075 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3076 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3077 3078 if (oldmat->colmap) { 3079 #if defined(PETSC_USE_CTABLE) 3080 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3081 #else 3082 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 3083 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3084 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 3085 #endif 3086 } else a->colmap = NULL; 3087 if (oldmat->garray) { 3088 PetscInt len; 3089 len = oldmat->B->cmap->n; 3090 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 3091 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3092 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 3093 } else a->garray = NULL; 3094 3095 /* It may happen MatDuplicate is called with a non-assembled matrix 3096 In fact, MatDuplicate only requires the matrix to be preallocated 3097 This may happen inside a DMCreateMatrix_Shell */ 3098 if (oldmat->lvec) { 3099 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3100 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3101 } 3102 if (oldmat->Mvctx) { 3103 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3104 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3105 } 3106 if (oldmat->Mvctx_mpi1) { 3107 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 3108 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 3109 } 3110 3111 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3112 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3113 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3114 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3115 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3116 *newmat = mat; 3117 PetscFunctionReturn(0); 3118 } 3119 3120 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3121 { 3122 PetscBool isbinary, ishdf5; 3123 PetscErrorCode ierr; 3124 3125 PetscFunctionBegin; 3126 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3127 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3128 /* force binary viewer to load .info file if it has not yet done so */ 3129 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3130 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3131 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 3132 if (isbinary) { 3133 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 3134 } else if (ishdf5) { 3135 #if defined(PETSC_HAVE_HDF5) 3136 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 3137 #else 3138 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3139 #endif 3140 } else { 3141 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3142 } 3143 PetscFunctionReturn(0); 3144 } 3145 3146 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3147 { 3148 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3149 PetscInt *rowidxs,*colidxs; 3150 PetscScalar *matvals; 3151 PetscErrorCode ierr; 3152 3153 PetscFunctionBegin; 3154 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3155 3156 /* read in matrix header */ 3157 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3158 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3159 M = header[1]; N = header[2]; nz = header[3]; 3160 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3161 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3162 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3163 3164 /* set block sizes from the viewer's .info file */ 3165 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3166 /* set global sizes if not set already */ 3167 if (mat->rmap->N < 0) mat->rmap->N = M; 3168 if (mat->cmap->N < 0) mat->cmap->N = N; 3169 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3170 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3171 3172 /* check if the matrix sizes are correct */ 3173 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3174 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3175 3176 /* read in row lengths and build row indices */ 3177 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3178 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3179 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3180 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3181 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3182 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3183 /* read in column indices and matrix values */ 3184 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3185 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3186 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3187 /* store matrix indices and values */ 3188 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3189 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3190 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3191 PetscFunctionReturn(0); 3192 } 3193 3194 /* Not scalable because of ISAllGather() unless getting all columns. */ 3195 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3196 { 3197 PetscErrorCode ierr; 3198 IS iscol_local; 3199 PetscBool isstride; 3200 PetscMPIInt lisstride=0,gisstride; 3201 3202 PetscFunctionBegin; 3203 /* check if we are grabbing all columns*/ 3204 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3205 3206 if (isstride) { 3207 PetscInt start,len,mstart,mlen; 3208 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3209 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3210 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3211 if (mstart == start && mlen-mstart == len) lisstride = 1; 3212 } 3213 3214 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3215 if (gisstride) { 3216 PetscInt N; 3217 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3218 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3219 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3220 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3221 } else { 3222 PetscInt cbs; 3223 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3224 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3225 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3226 } 3227 3228 *isseq = iscol_local; 3229 PetscFunctionReturn(0); 3230 } 3231 3232 /* 3233 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3234 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3235 3236 Input Parameters: 3237 mat - matrix 3238 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3239 i.e., mat->rstart <= isrow[i] < mat->rend 3240 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3241 i.e., mat->cstart <= iscol[i] < mat->cend 3242 Output Parameter: 3243 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3244 iscol_o - sequential column index set for retrieving mat->B 3245 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3246 */ 3247 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3248 { 3249 PetscErrorCode ierr; 3250 Vec x,cmap; 3251 const PetscInt *is_idx; 3252 PetscScalar *xarray,*cmaparray; 3253 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3254 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3255 Mat B=a->B; 3256 Vec lvec=a->lvec,lcmap; 3257 PetscInt i,cstart,cend,Bn=B->cmap->N; 3258 MPI_Comm comm; 3259 VecScatter Mvctx=a->Mvctx; 3260 3261 PetscFunctionBegin; 3262 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3263 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3264 3265 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3266 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3267 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3268 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3269 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3270 3271 /* Get start indices */ 3272 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3273 isstart -= ncols; 3274 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3275 3276 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3277 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3278 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3279 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3280 for (i=0; i<ncols; i++) { 3281 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3282 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3283 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3284 } 3285 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3286 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3287 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3288 3289 /* Get iscol_d */ 3290 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3291 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3292 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3293 3294 /* Get isrow_d */ 3295 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3296 rstart = mat->rmap->rstart; 3297 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3298 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3299 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3300 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3301 3302 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3303 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3304 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3305 3306 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3307 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3308 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3309 3310 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3311 3312 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3313 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3314 3315 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3316 /* off-process column indices */ 3317 count = 0; 3318 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3319 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3320 3321 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3322 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3323 for (i=0; i<Bn; i++) { 3324 if (PetscRealPart(xarray[i]) > -1.0) { 3325 idx[count] = i; /* local column index in off-diagonal part B */ 3326 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3327 count++; 3328 } 3329 } 3330 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3331 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3332 3333 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3334 /* cannot ensure iscol_o has same blocksize as iscol! */ 3335 3336 ierr = PetscFree(idx);CHKERRQ(ierr); 3337 *garray = cmap1; 3338 3339 ierr = VecDestroy(&x);CHKERRQ(ierr); 3340 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3341 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3342 PetscFunctionReturn(0); 3343 } 3344 3345 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3346 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3347 { 3348 PetscErrorCode ierr; 3349 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3350 Mat M = NULL; 3351 MPI_Comm comm; 3352 IS iscol_d,isrow_d,iscol_o; 3353 Mat Asub = NULL,Bsub = NULL; 3354 PetscInt n; 3355 3356 PetscFunctionBegin; 3357 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3358 3359 if (call == MAT_REUSE_MATRIX) { 3360 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3361 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3362 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3363 3364 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3365 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3366 3367 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3368 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3369 3370 /* Update diagonal and off-diagonal portions of submat */ 3371 asub = (Mat_MPIAIJ*)(*submat)->data; 3372 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3373 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3374 if (n) { 3375 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3376 } 3377 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3378 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3379 3380 } else { /* call == MAT_INITIAL_MATRIX) */ 3381 const PetscInt *garray; 3382 PetscInt BsubN; 3383 3384 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3385 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3386 3387 /* Create local submatrices Asub and Bsub */ 3388 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3389 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3390 3391 /* Create submatrix M */ 3392 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3393 3394 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3395 asub = (Mat_MPIAIJ*)M->data; 3396 3397 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3398 n = asub->B->cmap->N; 3399 if (BsubN > n) { 3400 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3401 const PetscInt *idx; 3402 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3403 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3404 3405 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3406 j = 0; 3407 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3408 for (i=0; i<n; i++) { 3409 if (j >= BsubN) break; 3410 while (subgarray[i] > garray[j]) j++; 3411 3412 if (subgarray[i] == garray[j]) { 3413 idx_new[i] = idx[j++]; 3414 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3415 } 3416 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3417 3418 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3419 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3420 3421 } else if (BsubN < n) { 3422 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3423 } 3424 3425 ierr = PetscFree(garray);CHKERRQ(ierr); 3426 *submat = M; 3427 3428 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3429 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3430 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3431 3432 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3433 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3434 3435 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3436 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3437 } 3438 PetscFunctionReturn(0); 3439 } 3440 3441 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3442 { 3443 PetscErrorCode ierr; 3444 IS iscol_local=NULL,isrow_d; 3445 PetscInt csize; 3446 PetscInt n,i,j,start,end; 3447 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3448 MPI_Comm comm; 3449 3450 PetscFunctionBegin; 3451 /* If isrow has same processor distribution as mat, 3452 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3453 if (call == MAT_REUSE_MATRIX) { 3454 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3455 if (isrow_d) { 3456 sameRowDist = PETSC_TRUE; 3457 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3458 } else { 3459 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3460 if (iscol_local) { 3461 sameRowDist = PETSC_TRUE; 3462 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3463 } 3464 } 3465 } else { 3466 /* Check if isrow has same processor distribution as mat */ 3467 sameDist[0] = PETSC_FALSE; 3468 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3469 if (!n) { 3470 sameDist[0] = PETSC_TRUE; 3471 } else { 3472 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3473 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3474 if (i >= start && j < end) { 3475 sameDist[0] = PETSC_TRUE; 3476 } 3477 } 3478 3479 /* Check if iscol has same processor distribution as mat */ 3480 sameDist[1] = PETSC_FALSE; 3481 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3482 if (!n) { 3483 sameDist[1] = PETSC_TRUE; 3484 } else { 3485 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3486 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3487 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3488 } 3489 3490 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3491 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3492 sameRowDist = tsameDist[0]; 3493 } 3494 3495 if (sameRowDist) { 3496 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3497 /* isrow and iscol have same processor distribution as mat */ 3498 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3499 PetscFunctionReturn(0); 3500 } else { /* sameRowDist */ 3501 /* isrow has same processor distribution as mat */ 3502 if (call == MAT_INITIAL_MATRIX) { 3503 PetscBool sorted; 3504 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3505 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3506 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3507 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3508 3509 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3510 if (sorted) { 3511 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3512 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3513 PetscFunctionReturn(0); 3514 } 3515 } else { /* call == MAT_REUSE_MATRIX */ 3516 IS iscol_sub; 3517 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3518 if (iscol_sub) { 3519 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3520 PetscFunctionReturn(0); 3521 } 3522 } 3523 } 3524 } 3525 3526 /* General case: iscol -> iscol_local which has global size of iscol */ 3527 if (call == MAT_REUSE_MATRIX) { 3528 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3529 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3530 } else { 3531 if (!iscol_local) { 3532 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3533 } 3534 } 3535 3536 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3537 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3538 3539 if (call == MAT_INITIAL_MATRIX) { 3540 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3541 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3542 } 3543 PetscFunctionReturn(0); 3544 } 3545 3546 /*@C 3547 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3548 and "off-diagonal" part of the matrix in CSR format. 3549 3550 Collective 3551 3552 Input Parameters: 3553 + comm - MPI communicator 3554 . A - "diagonal" portion of matrix 3555 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3556 - garray - global index of B columns 3557 3558 Output Parameter: 3559 . mat - the matrix, with input A as its local diagonal matrix 3560 Level: advanced 3561 3562 Notes: 3563 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3564 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3565 3566 .seealso: MatCreateMPIAIJWithSplitArrays() 3567 @*/ 3568 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3569 { 3570 PetscErrorCode ierr; 3571 Mat_MPIAIJ *maij; 3572 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3573 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3574 PetscScalar *oa=b->a; 3575 Mat Bnew; 3576 PetscInt m,n,N; 3577 3578 PetscFunctionBegin; 3579 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3580 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3581 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3582 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3583 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3584 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3585 3586 /* Get global columns of mat */ 3587 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3588 3589 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3590 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3591 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3592 maij = (Mat_MPIAIJ*)(*mat)->data; 3593 3594 (*mat)->preallocated = PETSC_TRUE; 3595 3596 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3597 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3598 3599 /* Set A as diagonal portion of *mat */ 3600 maij->A = A; 3601 3602 nz = oi[m]; 3603 for (i=0; i<nz; i++) { 3604 col = oj[i]; 3605 oj[i] = garray[col]; 3606 } 3607 3608 /* Set Bnew as off-diagonal portion of *mat */ 3609 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3610 bnew = (Mat_SeqAIJ*)Bnew->data; 3611 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3612 maij->B = Bnew; 3613 3614 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3615 3616 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3617 b->free_a = PETSC_FALSE; 3618 b->free_ij = PETSC_FALSE; 3619 ierr = MatDestroy(&B);CHKERRQ(ierr); 3620 3621 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3622 bnew->free_a = PETSC_TRUE; 3623 bnew->free_ij = PETSC_TRUE; 3624 3625 /* condense columns of maij->B */ 3626 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3627 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3628 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3629 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3630 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3631 PetscFunctionReturn(0); 3632 } 3633 3634 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3635 3636 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3637 { 3638 PetscErrorCode ierr; 3639 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3640 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3641 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3642 Mat M,Msub,B=a->B; 3643 MatScalar *aa; 3644 Mat_SeqAIJ *aij; 3645 PetscInt *garray = a->garray,*colsub,Ncols; 3646 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3647 IS iscol_sub,iscmap; 3648 const PetscInt *is_idx,*cmap; 3649 PetscBool allcolumns=PETSC_FALSE; 3650 MPI_Comm comm; 3651 3652 PetscFunctionBegin; 3653 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3654 3655 if (call == MAT_REUSE_MATRIX) { 3656 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3657 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3658 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3659 3660 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3661 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3662 3663 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3664 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3665 3666 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3667 3668 } else { /* call == MAT_INITIAL_MATRIX) */ 3669 PetscBool flg; 3670 3671 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3672 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3673 3674 /* (1) iscol -> nonscalable iscol_local */ 3675 /* Check for special case: each processor gets entire matrix columns */ 3676 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3677 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3678 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3679 if (allcolumns) { 3680 iscol_sub = iscol_local; 3681 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3682 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3683 3684 } else { 3685 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3686 PetscInt *idx,*cmap1,k; 3687 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3688 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3689 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3690 count = 0; 3691 k = 0; 3692 for (i=0; i<Ncols; i++) { 3693 j = is_idx[i]; 3694 if (j >= cstart && j < cend) { 3695 /* diagonal part of mat */ 3696 idx[count] = j; 3697 cmap1[count++] = i; /* column index in submat */ 3698 } else if (Bn) { 3699 /* off-diagonal part of mat */ 3700 if (j == garray[k]) { 3701 idx[count] = j; 3702 cmap1[count++] = i; /* column index in submat */ 3703 } else if (j > garray[k]) { 3704 while (j > garray[k] && k < Bn-1) k++; 3705 if (j == garray[k]) { 3706 idx[count] = j; 3707 cmap1[count++] = i; /* column index in submat */ 3708 } 3709 } 3710 } 3711 } 3712 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3713 3714 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3715 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3716 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3717 3718 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3719 } 3720 3721 /* (3) Create sequential Msub */ 3722 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3723 } 3724 3725 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3726 aij = (Mat_SeqAIJ*)(Msub)->data; 3727 ii = aij->i; 3728 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3729 3730 /* 3731 m - number of local rows 3732 Ncols - number of columns (same on all processors) 3733 rstart - first row in new global matrix generated 3734 */ 3735 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3736 3737 if (call == MAT_INITIAL_MATRIX) { 3738 /* (4) Create parallel newmat */ 3739 PetscMPIInt rank,size; 3740 PetscInt csize; 3741 3742 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3743 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3744 3745 /* 3746 Determine the number of non-zeros in the diagonal and off-diagonal 3747 portions of the matrix in order to do correct preallocation 3748 */ 3749 3750 /* first get start and end of "diagonal" columns */ 3751 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3752 if (csize == PETSC_DECIDE) { 3753 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3754 if (mglobal == Ncols) { /* square matrix */ 3755 nlocal = m; 3756 } else { 3757 nlocal = Ncols/size + ((Ncols % size) > rank); 3758 } 3759 } else { 3760 nlocal = csize; 3761 } 3762 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3763 rstart = rend - nlocal; 3764 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3765 3766 /* next, compute all the lengths */ 3767 jj = aij->j; 3768 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3769 olens = dlens + m; 3770 for (i=0; i<m; i++) { 3771 jend = ii[i+1] - ii[i]; 3772 olen = 0; 3773 dlen = 0; 3774 for (j=0; j<jend; j++) { 3775 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3776 else dlen++; 3777 jj++; 3778 } 3779 olens[i] = olen; 3780 dlens[i] = dlen; 3781 } 3782 3783 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3784 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3785 3786 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3787 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3788 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3789 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3790 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3791 ierr = PetscFree(dlens);CHKERRQ(ierr); 3792 3793 } else { /* call == MAT_REUSE_MATRIX */ 3794 M = *newmat; 3795 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3796 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3797 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3798 /* 3799 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3800 rather than the slower MatSetValues(). 3801 */ 3802 M->was_assembled = PETSC_TRUE; 3803 M->assembled = PETSC_FALSE; 3804 } 3805 3806 /* (5) Set values of Msub to *newmat */ 3807 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3808 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3809 3810 jj = aij->j; 3811 aa = aij->a; 3812 for (i=0; i<m; i++) { 3813 row = rstart + i; 3814 nz = ii[i+1] - ii[i]; 3815 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3816 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3817 jj += nz; aa += nz; 3818 } 3819 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3820 3821 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3822 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3823 3824 ierr = PetscFree(colsub);CHKERRQ(ierr); 3825 3826 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3827 if (call == MAT_INITIAL_MATRIX) { 3828 *newmat = M; 3829 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3830 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3831 3832 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3833 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3834 3835 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3836 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3837 3838 if (iscol_local) { 3839 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3840 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3841 } 3842 } 3843 PetscFunctionReturn(0); 3844 } 3845 3846 /* 3847 Not great since it makes two copies of the submatrix, first an SeqAIJ 3848 in local and then by concatenating the local matrices the end result. 3849 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3850 3851 Note: This requires a sequential iscol with all indices. 3852 */ 3853 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3854 { 3855 PetscErrorCode ierr; 3856 PetscMPIInt rank,size; 3857 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3858 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3859 Mat M,Mreuse; 3860 MatScalar *aa,*vwork; 3861 MPI_Comm comm; 3862 Mat_SeqAIJ *aij; 3863 PetscBool colflag,allcolumns=PETSC_FALSE; 3864 3865 PetscFunctionBegin; 3866 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3867 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3868 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3869 3870 /* Check for special case: each processor gets entire matrix columns */ 3871 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3872 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3873 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3874 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3875 3876 if (call == MAT_REUSE_MATRIX) { 3877 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3878 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3879 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3880 } else { 3881 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3882 } 3883 3884 /* 3885 m - number of local rows 3886 n - number of columns (same on all processors) 3887 rstart - first row in new global matrix generated 3888 */ 3889 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3890 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3891 if (call == MAT_INITIAL_MATRIX) { 3892 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3893 ii = aij->i; 3894 jj = aij->j; 3895 3896 /* 3897 Determine the number of non-zeros in the diagonal and off-diagonal 3898 portions of the matrix in order to do correct preallocation 3899 */ 3900 3901 /* first get start and end of "diagonal" columns */ 3902 if (csize == PETSC_DECIDE) { 3903 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3904 if (mglobal == n) { /* square matrix */ 3905 nlocal = m; 3906 } else { 3907 nlocal = n/size + ((n % size) > rank); 3908 } 3909 } else { 3910 nlocal = csize; 3911 } 3912 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3913 rstart = rend - nlocal; 3914 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3915 3916 /* next, compute all the lengths */ 3917 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3918 olens = dlens + m; 3919 for (i=0; i<m; i++) { 3920 jend = ii[i+1] - ii[i]; 3921 olen = 0; 3922 dlen = 0; 3923 for (j=0; j<jend; j++) { 3924 if (*jj < rstart || *jj >= rend) olen++; 3925 else dlen++; 3926 jj++; 3927 } 3928 olens[i] = olen; 3929 dlens[i] = dlen; 3930 } 3931 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3932 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3933 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3934 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3935 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3936 ierr = PetscFree(dlens);CHKERRQ(ierr); 3937 } else { 3938 PetscInt ml,nl; 3939 3940 M = *newmat; 3941 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3942 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3943 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3944 /* 3945 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3946 rather than the slower MatSetValues(). 3947 */ 3948 M->was_assembled = PETSC_TRUE; 3949 M->assembled = PETSC_FALSE; 3950 } 3951 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3952 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3953 ii = aij->i; 3954 jj = aij->j; 3955 aa = aij->a; 3956 for (i=0; i<m; i++) { 3957 row = rstart + i; 3958 nz = ii[i+1] - ii[i]; 3959 cwork = jj; jj += nz; 3960 vwork = aa; aa += nz; 3961 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3962 } 3963 3964 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3965 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3966 *newmat = M; 3967 3968 /* save submatrix used in processor for next request */ 3969 if (call == MAT_INITIAL_MATRIX) { 3970 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3971 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3972 } 3973 PetscFunctionReturn(0); 3974 } 3975 3976 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3977 { 3978 PetscInt m,cstart, cend,j,nnz,i,d; 3979 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3980 const PetscInt *JJ; 3981 PetscErrorCode ierr; 3982 PetscBool nooffprocentries; 3983 3984 PetscFunctionBegin; 3985 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3986 3987 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3988 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3989 m = B->rmap->n; 3990 cstart = B->cmap->rstart; 3991 cend = B->cmap->rend; 3992 rstart = B->rmap->rstart; 3993 3994 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3995 3996 if (PetscDefined(USE_DEBUG)) { 3997 for (i=0; i<m; i++) { 3998 nnz = Ii[i+1]- Ii[i]; 3999 JJ = J + Ii[i]; 4000 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 4001 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 4002 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 4003 } 4004 } 4005 4006 for (i=0; i<m; i++) { 4007 nnz = Ii[i+1]- Ii[i]; 4008 JJ = J + Ii[i]; 4009 nnz_max = PetscMax(nnz_max,nnz); 4010 d = 0; 4011 for (j=0; j<nnz; j++) { 4012 if (cstart <= JJ[j] && JJ[j] < cend) d++; 4013 } 4014 d_nnz[i] = d; 4015 o_nnz[i] = nnz - d; 4016 } 4017 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 4018 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 4019 4020 for (i=0; i<m; i++) { 4021 ii = i + rstart; 4022 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 4023 } 4024 nooffprocentries = B->nooffprocentries; 4025 B->nooffprocentries = PETSC_TRUE; 4026 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4027 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4028 B->nooffprocentries = nooffprocentries; 4029 4030 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4031 PetscFunctionReturn(0); 4032 } 4033 4034 /*@ 4035 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4036 (the default parallel PETSc format). 4037 4038 Collective 4039 4040 Input Parameters: 4041 + B - the matrix 4042 . i - the indices into j for the start of each local row (starts with zero) 4043 . j - the column indices for each local row (starts with zero) 4044 - v - optional values in the matrix 4045 4046 Level: developer 4047 4048 Notes: 4049 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4050 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4051 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4052 4053 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4054 4055 The format which is used for the sparse matrix input, is equivalent to a 4056 row-major ordering.. i.e for the following matrix, the input data expected is 4057 as shown 4058 4059 $ 1 0 0 4060 $ 2 0 3 P0 4061 $ ------- 4062 $ 4 5 6 P1 4063 $ 4064 $ Process0 [P0]: rows_owned=[0,1] 4065 $ i = {0,1,3} [size = nrow+1 = 2+1] 4066 $ j = {0,0,2} [size = 3] 4067 $ v = {1,2,3} [size = 3] 4068 $ 4069 $ Process1 [P1]: rows_owned=[2] 4070 $ i = {0,3} [size = nrow+1 = 1+1] 4071 $ j = {0,1,2} [size = 3] 4072 $ v = {4,5,6} [size = 3] 4073 4074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4075 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4076 @*/ 4077 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4078 { 4079 PetscErrorCode ierr; 4080 4081 PetscFunctionBegin; 4082 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4083 PetscFunctionReturn(0); 4084 } 4085 4086 /*@C 4087 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4088 (the default parallel PETSc format). For good matrix assembly performance 4089 the user should preallocate the matrix storage by setting the parameters 4090 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4091 performance can be increased by more than a factor of 50. 4092 4093 Collective 4094 4095 Input Parameters: 4096 + B - the matrix 4097 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4098 (same value is used for all local rows) 4099 . d_nnz - array containing the number of nonzeros in the various rows of the 4100 DIAGONAL portion of the local submatrix (possibly different for each row) 4101 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4102 The size of this array is equal to the number of local rows, i.e 'm'. 4103 For matrices that will be factored, you must leave room for (and set) 4104 the diagonal entry even if it is zero. 4105 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4106 submatrix (same value is used for all local rows). 4107 - o_nnz - array containing the number of nonzeros in the various rows of the 4108 OFF-DIAGONAL portion of the local submatrix (possibly different for 4109 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4110 structure. The size of this array is equal to the number 4111 of local rows, i.e 'm'. 4112 4113 If the *_nnz parameter is given then the *_nz parameter is ignored 4114 4115 The AIJ format (also called the Yale sparse matrix format or 4116 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4117 storage. The stored row and column indices begin with zero. 4118 See Users-Manual: ch_mat for details. 4119 4120 The parallel matrix is partitioned such that the first m0 rows belong to 4121 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4122 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4123 4124 The DIAGONAL portion of the local submatrix of a processor can be defined 4125 as the submatrix which is obtained by extraction the part corresponding to 4126 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4127 first row that belongs to the processor, r2 is the last row belonging to 4128 the this processor, and c1-c2 is range of indices of the local part of a 4129 vector suitable for applying the matrix to. This is an mxn matrix. In the 4130 common case of a square matrix, the row and column ranges are the same and 4131 the DIAGONAL part is also square. The remaining portion of the local 4132 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4133 4134 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4135 4136 You can call MatGetInfo() to get information on how effective the preallocation was; 4137 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4138 You can also run with the option -info and look for messages with the string 4139 malloc in them to see if additional memory allocation was needed. 4140 4141 Example usage: 4142 4143 Consider the following 8x8 matrix with 34 non-zero values, that is 4144 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4145 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4146 as follows: 4147 4148 .vb 4149 1 2 0 | 0 3 0 | 0 4 4150 Proc0 0 5 6 | 7 0 0 | 8 0 4151 9 0 10 | 11 0 0 | 12 0 4152 ------------------------------------- 4153 13 0 14 | 15 16 17 | 0 0 4154 Proc1 0 18 0 | 19 20 21 | 0 0 4155 0 0 0 | 22 23 0 | 24 0 4156 ------------------------------------- 4157 Proc2 25 26 27 | 0 0 28 | 29 0 4158 30 0 0 | 31 32 33 | 0 34 4159 .ve 4160 4161 This can be represented as a collection of submatrices as: 4162 4163 .vb 4164 A B C 4165 D E F 4166 G H I 4167 .ve 4168 4169 Where the submatrices A,B,C are owned by proc0, D,E,F are 4170 owned by proc1, G,H,I are owned by proc2. 4171 4172 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4173 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4174 The 'M','N' parameters are 8,8, and have the same values on all procs. 4175 4176 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4177 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4178 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4179 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4180 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4181 matrix, ans [DF] as another SeqAIJ matrix. 4182 4183 When d_nz, o_nz parameters are specified, d_nz storage elements are 4184 allocated for every row of the local diagonal submatrix, and o_nz 4185 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4186 One way to choose d_nz and o_nz is to use the max nonzerors per local 4187 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4188 In this case, the values of d_nz,o_nz are: 4189 .vb 4190 proc0 : dnz = 2, o_nz = 2 4191 proc1 : dnz = 3, o_nz = 2 4192 proc2 : dnz = 1, o_nz = 4 4193 .ve 4194 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4195 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4196 for proc3. i.e we are using 12+15+10=37 storage locations to store 4197 34 values. 4198 4199 When d_nnz, o_nnz parameters are specified, the storage is specified 4200 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4201 In the above case the values for d_nnz,o_nnz are: 4202 .vb 4203 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4204 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4205 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4206 .ve 4207 Here the space allocated is sum of all the above values i.e 34, and 4208 hence pre-allocation is perfect. 4209 4210 Level: intermediate 4211 4212 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4213 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4214 @*/ 4215 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4216 { 4217 PetscErrorCode ierr; 4218 4219 PetscFunctionBegin; 4220 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4221 PetscValidType(B,1); 4222 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4223 PetscFunctionReturn(0); 4224 } 4225 4226 /*@ 4227 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4228 CSR format for the local rows. 4229 4230 Collective 4231 4232 Input Parameters: 4233 + comm - MPI communicator 4234 . m - number of local rows (Cannot be PETSC_DECIDE) 4235 . n - This value should be the same as the local size used in creating the 4236 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4237 calculated if N is given) For square matrices n is almost always m. 4238 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4239 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4240 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4241 . j - column indices 4242 - a - matrix values 4243 4244 Output Parameter: 4245 . mat - the matrix 4246 4247 Level: intermediate 4248 4249 Notes: 4250 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4251 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4252 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4253 4254 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4255 4256 The format which is used for the sparse matrix input, is equivalent to a 4257 row-major ordering.. i.e for the following matrix, the input data expected is 4258 as shown 4259 4260 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4261 4262 $ 1 0 0 4263 $ 2 0 3 P0 4264 $ ------- 4265 $ 4 5 6 P1 4266 $ 4267 $ Process0 [P0]: rows_owned=[0,1] 4268 $ i = {0,1,3} [size = nrow+1 = 2+1] 4269 $ j = {0,0,2} [size = 3] 4270 $ v = {1,2,3} [size = 3] 4271 $ 4272 $ Process1 [P1]: rows_owned=[2] 4273 $ i = {0,3} [size = nrow+1 = 1+1] 4274 $ j = {0,1,2} [size = 3] 4275 $ v = {4,5,6} [size = 3] 4276 4277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4278 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4279 @*/ 4280 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4281 { 4282 PetscErrorCode ierr; 4283 4284 PetscFunctionBegin; 4285 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4286 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4287 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4288 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4289 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4290 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4291 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4292 PetscFunctionReturn(0); 4293 } 4294 4295 /*@ 4296 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4297 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4298 4299 Collective 4300 4301 Input Parameters: 4302 + mat - the matrix 4303 . m - number of local rows (Cannot be PETSC_DECIDE) 4304 . n - This value should be the same as the local size used in creating the 4305 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4306 calculated if N is given) For square matrices n is almost always m. 4307 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4308 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4309 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4310 . J - column indices 4311 - v - matrix values 4312 4313 Level: intermediate 4314 4315 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4316 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4317 @*/ 4318 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4319 { 4320 PetscErrorCode ierr; 4321 PetscInt cstart,nnz,i,j; 4322 PetscInt *ld; 4323 PetscBool nooffprocentries; 4324 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4325 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4326 PetscScalar *ad = Ad->a, *ao = Ao->a; 4327 const PetscInt *Adi = Ad->i; 4328 PetscInt ldi,Iii,md; 4329 4330 PetscFunctionBegin; 4331 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4332 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4333 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4334 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4335 4336 cstart = mat->cmap->rstart; 4337 if (!Aij->ld) { 4338 /* count number of entries below block diagonal */ 4339 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4340 Aij->ld = ld; 4341 for (i=0; i<m; i++) { 4342 nnz = Ii[i+1]- Ii[i]; 4343 j = 0; 4344 while (J[j] < cstart && j < nnz) {j++;} 4345 J += nnz; 4346 ld[i] = j; 4347 } 4348 } else { 4349 ld = Aij->ld; 4350 } 4351 4352 for (i=0; i<m; i++) { 4353 nnz = Ii[i+1]- Ii[i]; 4354 Iii = Ii[i]; 4355 ldi = ld[i]; 4356 md = Adi[i+1]-Adi[i]; 4357 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4358 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4359 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4360 ad += md; 4361 ao += nnz - md; 4362 } 4363 nooffprocentries = mat->nooffprocentries; 4364 mat->nooffprocentries = PETSC_TRUE; 4365 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4366 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4367 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4368 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4369 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4370 mat->nooffprocentries = nooffprocentries; 4371 PetscFunctionReturn(0); 4372 } 4373 4374 /*@C 4375 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4376 (the default parallel PETSc format). For good matrix assembly performance 4377 the user should preallocate the matrix storage by setting the parameters 4378 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4379 performance can be increased by more than a factor of 50. 4380 4381 Collective 4382 4383 Input Parameters: 4384 + comm - MPI communicator 4385 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4386 This value should be the same as the local size used in creating the 4387 y vector for the matrix-vector product y = Ax. 4388 . n - This value should be the same as the local size used in creating the 4389 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4390 calculated if N is given) For square matrices n is almost always m. 4391 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4392 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4393 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4394 (same value is used for all local rows) 4395 . d_nnz - array containing the number of nonzeros in the various rows of the 4396 DIAGONAL portion of the local submatrix (possibly different for each row) 4397 or NULL, if d_nz is used to specify the nonzero structure. 4398 The size of this array is equal to the number of local rows, i.e 'm'. 4399 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4400 submatrix (same value is used for all local rows). 4401 - o_nnz - array containing the number of nonzeros in the various rows of the 4402 OFF-DIAGONAL portion of the local submatrix (possibly different for 4403 each row) or NULL, if o_nz is used to specify the nonzero 4404 structure. The size of this array is equal to the number 4405 of local rows, i.e 'm'. 4406 4407 Output Parameter: 4408 . A - the matrix 4409 4410 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4411 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4412 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4413 4414 Notes: 4415 If the *_nnz parameter is given then the *_nz parameter is ignored 4416 4417 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4418 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4419 storage requirements for this matrix. 4420 4421 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4422 processor than it must be used on all processors that share the object for 4423 that argument. 4424 4425 The user MUST specify either the local or global matrix dimensions 4426 (possibly both). 4427 4428 The parallel matrix is partitioned across processors such that the 4429 first m0 rows belong to process 0, the next m1 rows belong to 4430 process 1, the next m2 rows belong to process 2 etc.. where 4431 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4432 values corresponding to [m x N] submatrix. 4433 4434 The columns are logically partitioned with the n0 columns belonging 4435 to 0th partition, the next n1 columns belonging to the next 4436 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4437 4438 The DIAGONAL portion of the local submatrix on any given processor 4439 is the submatrix corresponding to the rows and columns m,n 4440 corresponding to the given processor. i.e diagonal matrix on 4441 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4442 etc. The remaining portion of the local submatrix [m x (N-n)] 4443 constitute the OFF-DIAGONAL portion. The example below better 4444 illustrates this concept. 4445 4446 For a square global matrix we define each processor's diagonal portion 4447 to be its local rows and the corresponding columns (a square submatrix); 4448 each processor's off-diagonal portion encompasses the remainder of the 4449 local matrix (a rectangular submatrix). 4450 4451 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4452 4453 When calling this routine with a single process communicator, a matrix of 4454 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4455 type of communicator, use the construction mechanism 4456 .vb 4457 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4458 .ve 4459 4460 $ MatCreate(...,&A); 4461 $ MatSetType(A,MATMPIAIJ); 4462 $ MatSetSizes(A, m,n,M,N); 4463 $ MatMPIAIJSetPreallocation(A,...); 4464 4465 By default, this format uses inodes (identical nodes) when possible. 4466 We search for consecutive rows with the same nonzero structure, thereby 4467 reusing matrix information to achieve increased efficiency. 4468 4469 Options Database Keys: 4470 + -mat_no_inode - Do not use inodes 4471 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4472 4473 4474 4475 Example usage: 4476 4477 Consider the following 8x8 matrix with 34 non-zero values, that is 4478 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4479 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4480 as follows 4481 4482 .vb 4483 1 2 0 | 0 3 0 | 0 4 4484 Proc0 0 5 6 | 7 0 0 | 8 0 4485 9 0 10 | 11 0 0 | 12 0 4486 ------------------------------------- 4487 13 0 14 | 15 16 17 | 0 0 4488 Proc1 0 18 0 | 19 20 21 | 0 0 4489 0 0 0 | 22 23 0 | 24 0 4490 ------------------------------------- 4491 Proc2 25 26 27 | 0 0 28 | 29 0 4492 30 0 0 | 31 32 33 | 0 34 4493 .ve 4494 4495 This can be represented as a collection of submatrices as 4496 4497 .vb 4498 A B C 4499 D E F 4500 G H I 4501 .ve 4502 4503 Where the submatrices A,B,C are owned by proc0, D,E,F are 4504 owned by proc1, G,H,I are owned by proc2. 4505 4506 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4507 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4508 The 'M','N' parameters are 8,8, and have the same values on all procs. 4509 4510 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4511 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4512 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4513 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4514 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4515 matrix, ans [DF] as another SeqAIJ matrix. 4516 4517 When d_nz, o_nz parameters are specified, d_nz storage elements are 4518 allocated for every row of the local diagonal submatrix, and o_nz 4519 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4520 One way to choose d_nz and o_nz is to use the max nonzerors per local 4521 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4522 In this case, the values of d_nz,o_nz are 4523 .vb 4524 proc0 : dnz = 2, o_nz = 2 4525 proc1 : dnz = 3, o_nz = 2 4526 proc2 : dnz = 1, o_nz = 4 4527 .ve 4528 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4529 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4530 for proc3. i.e we are using 12+15+10=37 storage locations to store 4531 34 values. 4532 4533 When d_nnz, o_nnz parameters are specified, the storage is specified 4534 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4535 In the above case the values for d_nnz,o_nnz are 4536 .vb 4537 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4538 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4539 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4540 .ve 4541 Here the space allocated is sum of all the above values i.e 34, and 4542 hence pre-allocation is perfect. 4543 4544 Level: intermediate 4545 4546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4547 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4548 @*/ 4549 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4550 { 4551 PetscErrorCode ierr; 4552 PetscMPIInt size; 4553 4554 PetscFunctionBegin; 4555 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4556 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4557 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4558 if (size > 1) { 4559 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4560 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4561 } else { 4562 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4563 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4564 } 4565 PetscFunctionReturn(0); 4566 } 4567 4568 /*@C 4569 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4570 4571 Not collective 4572 4573 Input Parameter: 4574 . A - The MPIAIJ matrix 4575 4576 Output Parameters: 4577 + Ad - The local diagonal block as a SeqAIJ matrix 4578 . Ao - The local off-diagonal block as a SeqAIJ matrix 4579 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4580 4581 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4582 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4583 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4584 local column numbers to global column numbers in the original matrix. 4585 4586 Level: intermediate 4587 4588 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4589 @*/ 4590 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4591 { 4592 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4593 PetscBool flg; 4594 PetscErrorCode ierr; 4595 4596 PetscFunctionBegin; 4597 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4598 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4599 if (Ad) *Ad = a->A; 4600 if (Ao) *Ao = a->B; 4601 if (colmap) *colmap = a->garray; 4602 PetscFunctionReturn(0); 4603 } 4604 4605 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4606 { 4607 PetscErrorCode ierr; 4608 PetscInt m,N,i,rstart,nnz,Ii; 4609 PetscInt *indx; 4610 PetscScalar *values; 4611 4612 PetscFunctionBegin; 4613 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4614 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4615 PetscInt *dnz,*onz,sum,bs,cbs; 4616 4617 if (n == PETSC_DECIDE) { 4618 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4619 } 4620 /* Check sum(n) = N */ 4621 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4622 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4623 4624 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4625 rstart -= m; 4626 4627 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4628 for (i=0; i<m; i++) { 4629 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4630 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4631 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4632 } 4633 4634 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4635 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4636 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4637 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4638 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4639 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4640 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4641 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4642 } 4643 4644 /* numeric phase */ 4645 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4646 for (i=0; i<m; i++) { 4647 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4648 Ii = i + rstart; 4649 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4650 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4651 } 4652 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4653 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4654 PetscFunctionReturn(0); 4655 } 4656 4657 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4658 { 4659 PetscErrorCode ierr; 4660 PetscMPIInt rank; 4661 PetscInt m,N,i,rstart,nnz; 4662 size_t len; 4663 const PetscInt *indx; 4664 PetscViewer out; 4665 char *name; 4666 Mat B; 4667 const PetscScalar *values; 4668 4669 PetscFunctionBegin; 4670 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4671 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4672 /* Should this be the type of the diagonal block of A? */ 4673 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4674 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4675 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4676 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4677 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4678 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4679 for (i=0; i<m; i++) { 4680 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4681 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4682 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4683 } 4684 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4685 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4686 4687 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4688 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4689 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4690 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4691 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4692 ierr = PetscFree(name);CHKERRQ(ierr); 4693 ierr = MatView(B,out);CHKERRQ(ierr); 4694 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4695 ierr = MatDestroy(&B);CHKERRQ(ierr); 4696 PetscFunctionReturn(0); 4697 } 4698 4699 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4700 { 4701 PetscErrorCode ierr; 4702 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4703 4704 PetscFunctionBegin; 4705 if (!merge) PetscFunctionReturn(0); 4706 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4707 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4708 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4709 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4710 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4711 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4712 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4713 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4714 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4715 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4716 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4717 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4718 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4719 ierr = PetscFree(merge);CHKERRQ(ierr); 4720 PetscFunctionReturn(0); 4721 } 4722 4723 #include <../src/mat/utils/freespace.h> 4724 #include <petscbt.h> 4725 4726 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4727 { 4728 PetscErrorCode ierr; 4729 MPI_Comm comm; 4730 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4731 PetscMPIInt size,rank,taga,*len_s; 4732 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4733 PetscInt proc,m; 4734 PetscInt **buf_ri,**buf_rj; 4735 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4736 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4737 MPI_Request *s_waits,*r_waits; 4738 MPI_Status *status; 4739 MatScalar *aa=a->a; 4740 MatScalar **abuf_r,*ba_i; 4741 Mat_Merge_SeqsToMPI *merge; 4742 PetscContainer container; 4743 4744 PetscFunctionBegin; 4745 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4746 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4747 4748 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4749 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4750 4751 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4752 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4753 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4754 4755 bi = merge->bi; 4756 bj = merge->bj; 4757 buf_ri = merge->buf_ri; 4758 buf_rj = merge->buf_rj; 4759 4760 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4761 owners = merge->rowmap->range; 4762 len_s = merge->len_s; 4763 4764 /* send and recv matrix values */ 4765 /*-----------------------------*/ 4766 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4767 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4768 4769 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4770 for (proc=0,k=0; proc<size; proc++) { 4771 if (!len_s[proc]) continue; 4772 i = owners[proc]; 4773 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4774 k++; 4775 } 4776 4777 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4778 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4779 ierr = PetscFree(status);CHKERRQ(ierr); 4780 4781 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4782 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4783 4784 /* insert mat values of mpimat */ 4785 /*----------------------------*/ 4786 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4787 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4788 4789 for (k=0; k<merge->nrecv; k++) { 4790 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4791 nrows = *(buf_ri_k[k]); 4792 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4793 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4794 } 4795 4796 /* set values of ba */ 4797 m = merge->rowmap->n; 4798 for (i=0; i<m; i++) { 4799 arow = owners[rank] + i; 4800 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4801 bnzi = bi[i+1] - bi[i]; 4802 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4803 4804 /* add local non-zero vals of this proc's seqmat into ba */ 4805 anzi = ai[arow+1] - ai[arow]; 4806 aj = a->j + ai[arow]; 4807 aa = a->a + ai[arow]; 4808 nextaj = 0; 4809 for (j=0; nextaj<anzi; j++) { 4810 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4811 ba_i[j] += aa[nextaj++]; 4812 } 4813 } 4814 4815 /* add received vals into ba */ 4816 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4817 /* i-th row */ 4818 if (i == *nextrow[k]) { 4819 anzi = *(nextai[k]+1) - *nextai[k]; 4820 aj = buf_rj[k] + *(nextai[k]); 4821 aa = abuf_r[k] + *(nextai[k]); 4822 nextaj = 0; 4823 for (j=0; nextaj<anzi; j++) { 4824 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4825 ba_i[j] += aa[nextaj++]; 4826 } 4827 } 4828 nextrow[k]++; nextai[k]++; 4829 } 4830 } 4831 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4832 } 4833 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4834 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4835 4836 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4837 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4838 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4839 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4840 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4841 PetscFunctionReturn(0); 4842 } 4843 4844 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4845 { 4846 PetscErrorCode ierr; 4847 Mat B_mpi; 4848 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4849 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4850 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4851 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4852 PetscInt len,proc,*dnz,*onz,bs,cbs; 4853 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4854 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4855 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4856 MPI_Status *status; 4857 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4858 PetscBT lnkbt; 4859 Mat_Merge_SeqsToMPI *merge; 4860 PetscContainer container; 4861 4862 PetscFunctionBegin; 4863 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4864 4865 /* make sure it is a PETSc comm */ 4866 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4867 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4868 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4869 4870 ierr = PetscNew(&merge);CHKERRQ(ierr); 4871 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4872 4873 /* determine row ownership */ 4874 /*---------------------------------------------------------*/ 4875 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4876 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4877 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4878 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4879 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4880 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4881 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4882 4883 m = merge->rowmap->n; 4884 owners = merge->rowmap->range; 4885 4886 /* determine the number of messages to send, their lengths */ 4887 /*---------------------------------------------------------*/ 4888 len_s = merge->len_s; 4889 4890 len = 0; /* length of buf_si[] */ 4891 merge->nsend = 0; 4892 for (proc=0; proc<size; proc++) { 4893 len_si[proc] = 0; 4894 if (proc == rank) { 4895 len_s[proc] = 0; 4896 } else { 4897 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4898 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4899 } 4900 if (len_s[proc]) { 4901 merge->nsend++; 4902 nrows = 0; 4903 for (i=owners[proc]; i<owners[proc+1]; i++) { 4904 if (ai[i+1] > ai[i]) nrows++; 4905 } 4906 len_si[proc] = 2*(nrows+1); 4907 len += len_si[proc]; 4908 } 4909 } 4910 4911 /* determine the number and length of messages to receive for ij-structure */ 4912 /*-------------------------------------------------------------------------*/ 4913 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4914 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4915 4916 /* post the Irecv of j-structure */ 4917 /*-------------------------------*/ 4918 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4919 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4920 4921 /* post the Isend of j-structure */ 4922 /*--------------------------------*/ 4923 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4924 4925 for (proc=0, k=0; proc<size; proc++) { 4926 if (!len_s[proc]) continue; 4927 i = owners[proc]; 4928 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4929 k++; 4930 } 4931 4932 /* receives and sends of j-structure are complete */ 4933 /*------------------------------------------------*/ 4934 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4935 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4936 4937 /* send and recv i-structure */ 4938 /*---------------------------*/ 4939 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4940 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4941 4942 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4943 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4944 for (proc=0,k=0; proc<size; proc++) { 4945 if (!len_s[proc]) continue; 4946 /* form outgoing message for i-structure: 4947 buf_si[0]: nrows to be sent 4948 [1:nrows]: row index (global) 4949 [nrows+1:2*nrows+1]: i-structure index 4950 */ 4951 /*-------------------------------------------*/ 4952 nrows = len_si[proc]/2 - 1; 4953 buf_si_i = buf_si + nrows+1; 4954 buf_si[0] = nrows; 4955 buf_si_i[0] = 0; 4956 nrows = 0; 4957 for (i=owners[proc]; i<owners[proc+1]; i++) { 4958 anzi = ai[i+1] - ai[i]; 4959 if (anzi) { 4960 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4961 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4962 nrows++; 4963 } 4964 } 4965 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4966 k++; 4967 buf_si += len_si[proc]; 4968 } 4969 4970 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4971 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4972 4973 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4974 for (i=0; i<merge->nrecv; i++) { 4975 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4976 } 4977 4978 ierr = PetscFree(len_si);CHKERRQ(ierr); 4979 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4980 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4981 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4982 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4983 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4984 ierr = PetscFree(status);CHKERRQ(ierr); 4985 4986 /* compute a local seq matrix in each processor */ 4987 /*----------------------------------------------*/ 4988 /* allocate bi array and free space for accumulating nonzero column info */ 4989 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4990 bi[0] = 0; 4991 4992 /* create and initialize a linked list */ 4993 nlnk = N+1; 4994 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4995 4996 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4997 len = ai[owners[rank+1]] - ai[owners[rank]]; 4998 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4999 5000 current_space = free_space; 5001 5002 /* determine symbolic info for each local row */ 5003 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 5004 5005 for (k=0; k<merge->nrecv; k++) { 5006 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5007 nrows = *buf_ri_k[k]; 5008 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5009 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5010 } 5011 5012 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5013 len = 0; 5014 for (i=0; i<m; i++) { 5015 bnzi = 0; 5016 /* add local non-zero cols of this proc's seqmat into lnk */ 5017 arow = owners[rank] + i; 5018 anzi = ai[arow+1] - ai[arow]; 5019 aj = a->j + ai[arow]; 5020 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5021 bnzi += nlnk; 5022 /* add received col data into lnk */ 5023 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5024 if (i == *nextrow[k]) { /* i-th row */ 5025 anzi = *(nextai[k]+1) - *nextai[k]; 5026 aj = buf_rj[k] + *nextai[k]; 5027 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5028 bnzi += nlnk; 5029 nextrow[k]++; nextai[k]++; 5030 } 5031 } 5032 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5033 5034 /* if free space is not available, make more free space */ 5035 if (current_space->local_remaining<bnzi) { 5036 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 5037 nspacedouble++; 5038 } 5039 /* copy data into free space, then initialize lnk */ 5040 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5041 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5042 5043 current_space->array += bnzi; 5044 current_space->local_used += bnzi; 5045 current_space->local_remaining -= bnzi; 5046 5047 bi[i+1] = bi[i] + bnzi; 5048 } 5049 5050 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5051 5052 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5053 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5054 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5055 5056 /* create symbolic parallel matrix B_mpi */ 5057 /*---------------------------------------*/ 5058 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5059 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5060 if (n==PETSC_DECIDE) { 5061 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5062 } else { 5063 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5064 } 5065 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5066 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5067 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5068 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5069 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5070 5071 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5072 B_mpi->assembled = PETSC_FALSE; 5073 merge->bi = bi; 5074 merge->bj = bj; 5075 merge->buf_ri = buf_ri; 5076 merge->buf_rj = buf_rj; 5077 merge->coi = NULL; 5078 merge->coj = NULL; 5079 merge->owners_co = NULL; 5080 5081 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5082 5083 /* attach the supporting struct to B_mpi for reuse */ 5084 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5085 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5086 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 5087 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5088 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5089 *mpimat = B_mpi; 5090 5091 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5092 PetscFunctionReturn(0); 5093 } 5094 5095 /*@C 5096 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5097 matrices from each processor 5098 5099 Collective 5100 5101 Input Parameters: 5102 + comm - the communicators the parallel matrix will live on 5103 . seqmat - the input sequential matrices 5104 . m - number of local rows (or PETSC_DECIDE) 5105 . n - number of local columns (or PETSC_DECIDE) 5106 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5107 5108 Output Parameter: 5109 . mpimat - the parallel matrix generated 5110 5111 Level: advanced 5112 5113 Notes: 5114 The dimensions of the sequential matrix in each processor MUST be the same. 5115 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5116 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5117 @*/ 5118 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5119 { 5120 PetscErrorCode ierr; 5121 PetscMPIInt size; 5122 5123 PetscFunctionBegin; 5124 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5125 if (size == 1) { 5126 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5127 if (scall == MAT_INITIAL_MATRIX) { 5128 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5129 } else { 5130 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5131 } 5132 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5133 PetscFunctionReturn(0); 5134 } 5135 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5136 if (scall == MAT_INITIAL_MATRIX) { 5137 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5138 } 5139 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5140 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5141 PetscFunctionReturn(0); 5142 } 5143 5144 /*@ 5145 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5146 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5147 with MatGetSize() 5148 5149 Not Collective 5150 5151 Input Parameters: 5152 + A - the matrix 5153 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5154 5155 Output Parameter: 5156 . A_loc - the local sequential matrix generated 5157 5158 Level: developer 5159 5160 Notes: 5161 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5162 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5163 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5164 modify the values of the returned A_loc. 5165 5166 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5167 5168 @*/ 5169 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5170 { 5171 PetscErrorCode ierr; 5172 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5173 Mat_SeqAIJ *mat,*a,*b; 5174 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5175 MatScalar *aa,*ba,*cam; 5176 PetscScalar *ca; 5177 PetscMPIInt size; 5178 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5179 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5180 PetscBool match; 5181 5182 PetscFunctionBegin; 5183 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5184 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5185 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5186 if (size == 1) { 5187 if (scall == MAT_INITIAL_MATRIX) { 5188 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5189 *A_loc = mpimat->A; 5190 } else if (scall == MAT_REUSE_MATRIX) { 5191 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5192 } 5193 PetscFunctionReturn(0); 5194 } 5195 5196 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5197 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5198 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5199 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5200 aa = a->a; ba = b->a; 5201 if (scall == MAT_INITIAL_MATRIX) { 5202 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5203 ci[0] = 0; 5204 for (i=0; i<am; i++) { 5205 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5206 } 5207 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5208 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5209 k = 0; 5210 for (i=0; i<am; i++) { 5211 ncols_o = bi[i+1] - bi[i]; 5212 ncols_d = ai[i+1] - ai[i]; 5213 /* off-diagonal portion of A */ 5214 for (jo=0; jo<ncols_o; jo++) { 5215 col = cmap[*bj]; 5216 if (col >= cstart) break; 5217 cj[k] = col; bj++; 5218 ca[k++] = *ba++; 5219 } 5220 /* diagonal portion of A */ 5221 for (j=0; j<ncols_d; j++) { 5222 cj[k] = cstart + *aj++; 5223 ca[k++] = *aa++; 5224 } 5225 /* off-diagonal portion of A */ 5226 for (j=jo; j<ncols_o; j++) { 5227 cj[k] = cmap[*bj++]; 5228 ca[k++] = *ba++; 5229 } 5230 } 5231 /* put together the new matrix */ 5232 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5233 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5234 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5235 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5236 mat->free_a = PETSC_TRUE; 5237 mat->free_ij = PETSC_TRUE; 5238 mat->nonew = 0; 5239 } else if (scall == MAT_REUSE_MATRIX) { 5240 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5241 ci = mat->i; cj = mat->j; cam = mat->a; 5242 for (i=0; i<am; i++) { 5243 /* off-diagonal portion of A */ 5244 ncols_o = bi[i+1] - bi[i]; 5245 for (jo=0; jo<ncols_o; jo++) { 5246 col = cmap[*bj]; 5247 if (col >= cstart) break; 5248 *cam++ = *ba++; bj++; 5249 } 5250 /* diagonal portion of A */ 5251 ncols_d = ai[i+1] - ai[i]; 5252 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5253 /* off-diagonal portion of A */ 5254 for (j=jo; j<ncols_o; j++) { 5255 *cam++ = *ba++; bj++; 5256 } 5257 } 5258 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5259 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5260 PetscFunctionReturn(0); 5261 } 5262 5263 /*@C 5264 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5265 5266 Not Collective 5267 5268 Input Parameters: 5269 + A - the matrix 5270 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5271 - row, col - index sets of rows and columns to extract (or NULL) 5272 5273 Output Parameter: 5274 . A_loc - the local sequential matrix generated 5275 5276 Level: developer 5277 5278 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5279 5280 @*/ 5281 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5282 { 5283 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5284 PetscErrorCode ierr; 5285 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5286 IS isrowa,iscola; 5287 Mat *aloc; 5288 PetscBool match; 5289 5290 PetscFunctionBegin; 5291 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5292 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5293 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5294 if (!row) { 5295 start = A->rmap->rstart; end = A->rmap->rend; 5296 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5297 } else { 5298 isrowa = *row; 5299 } 5300 if (!col) { 5301 start = A->cmap->rstart; 5302 cmap = a->garray; 5303 nzA = a->A->cmap->n; 5304 nzB = a->B->cmap->n; 5305 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5306 ncols = 0; 5307 for (i=0; i<nzB; i++) { 5308 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5309 else break; 5310 } 5311 imark = i; 5312 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5313 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5314 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5315 } else { 5316 iscola = *col; 5317 } 5318 if (scall != MAT_INITIAL_MATRIX) { 5319 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5320 aloc[0] = *A_loc; 5321 } 5322 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5323 if (!col) { /* attach global id of condensed columns */ 5324 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5325 } 5326 *A_loc = aloc[0]; 5327 ierr = PetscFree(aloc);CHKERRQ(ierr); 5328 if (!row) { 5329 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5330 } 5331 if (!col) { 5332 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5333 } 5334 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5335 PetscFunctionReturn(0); 5336 } 5337 5338 /* 5339 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5340 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5341 * on a global size. 5342 * */ 5343 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5344 { 5345 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5346 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5347 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5348 PetscMPIInt owner; 5349 PetscSFNode *iremote,*oiremote; 5350 const PetscInt *lrowindices; 5351 PetscErrorCode ierr; 5352 PetscSF sf,osf; 5353 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5354 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5355 MPI_Comm comm; 5356 ISLocalToGlobalMapping mapping; 5357 5358 PetscFunctionBegin; 5359 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5360 /* plocalsize is the number of roots 5361 * nrows is the number of leaves 5362 * */ 5363 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5364 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5365 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5366 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5367 for (i=0;i<nrows;i++) { 5368 /* Find a remote index and an owner for a row 5369 * The row could be local or remote 5370 * */ 5371 owner = 0; 5372 lidx = 0; 5373 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5374 iremote[i].index = lidx; 5375 iremote[i].rank = owner; 5376 } 5377 /* Create SF to communicate how many nonzero columns for each row */ 5378 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5379 /* SF will figure out the number of nonzero colunms for each row, and their 5380 * offsets 5381 * */ 5382 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5383 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5384 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5385 5386 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5387 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5388 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5389 roffsets[0] = 0; 5390 roffsets[1] = 0; 5391 for (i=0;i<plocalsize;i++) { 5392 /* diag */ 5393 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5394 /* off diag */ 5395 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5396 /* compute offsets so that we relative location for each row */ 5397 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5398 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5399 } 5400 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5401 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5402 /* 'r' means root, and 'l' means leaf */ 5403 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5404 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5405 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5406 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5407 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5408 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5409 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5410 dntotalcols = 0; 5411 ontotalcols = 0; 5412 ncol = 0; 5413 for (i=0;i<nrows;i++) { 5414 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5415 ncol = PetscMax(pnnz[i],ncol); 5416 /* diag */ 5417 dntotalcols += nlcols[i*2+0]; 5418 /* off diag */ 5419 ontotalcols += nlcols[i*2+1]; 5420 } 5421 /* We do not need to figure the right number of columns 5422 * since all the calculations will be done by going through the raw data 5423 * */ 5424 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5425 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5426 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5427 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5428 /* diag */ 5429 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5430 /* off diag */ 5431 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5432 /* diag */ 5433 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5434 /* off diag */ 5435 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5436 dntotalcols = 0; 5437 ontotalcols = 0; 5438 ntotalcols = 0; 5439 for (i=0;i<nrows;i++) { 5440 owner = 0; 5441 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5442 /* Set iremote for diag matrix */ 5443 for (j=0;j<nlcols[i*2+0];j++) { 5444 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5445 iremote[dntotalcols].rank = owner; 5446 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5447 ilocal[dntotalcols++] = ntotalcols++; 5448 } 5449 /* off diag */ 5450 for (j=0;j<nlcols[i*2+1];j++) { 5451 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5452 oiremote[ontotalcols].rank = owner; 5453 oilocal[ontotalcols++] = ntotalcols++; 5454 } 5455 } 5456 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5457 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5458 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5459 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5460 /* P serves as roots and P_oth is leaves 5461 * Diag matrix 5462 * */ 5463 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5464 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5465 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5466 5467 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5468 /* Off diag */ 5469 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5470 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5471 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5472 /* We operate on the matrix internal data for saving memory */ 5473 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5474 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5475 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5476 /* Convert to global indices for diag matrix */ 5477 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5478 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5479 /* We want P_oth store global indices */ 5480 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5481 /* Use memory scalable approach */ 5482 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5483 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5484 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5485 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5486 /* Convert back to local indices */ 5487 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5488 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5489 nout = 0; 5490 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5491 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5492 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5493 /* Exchange values */ 5494 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5495 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5496 /* Stop PETSc from shrinking memory */ 5497 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5498 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5499 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5500 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5501 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5502 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5503 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5504 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5505 PetscFunctionReturn(0); 5506 } 5507 5508 /* 5509 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5510 * This supports MPIAIJ and MAIJ 5511 * */ 5512 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5513 { 5514 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5515 Mat_SeqAIJ *p_oth; 5516 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5517 IS rows,map; 5518 PetscHMapI hamp; 5519 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5520 MPI_Comm comm; 5521 PetscSF sf,osf; 5522 PetscBool has; 5523 PetscErrorCode ierr; 5524 5525 PetscFunctionBegin; 5526 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5527 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5528 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5529 * and then create a submatrix (that often is an overlapping matrix) 5530 * */ 5531 if (reuse == MAT_INITIAL_MATRIX) { 5532 /* Use a hash table to figure out unique keys */ 5533 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5534 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5535 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5536 count = 0; 5537 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5538 for (i=0;i<a->B->cmap->n;i++) { 5539 key = a->garray[i]/dof; 5540 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5541 if (!has) { 5542 mapping[i] = count; 5543 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5544 } else { 5545 /* Current 'i' has the same value the previous step */ 5546 mapping[i] = count-1; 5547 } 5548 } 5549 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5550 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5551 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5552 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5553 off = 0; 5554 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5555 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5556 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5557 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5558 /* In case, the matrix was already created but users want to recreate the matrix */ 5559 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5560 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5561 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5562 ierr = ISDestroy(&map);CHKERRQ(ierr); 5563 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5564 } else if (reuse == MAT_REUSE_MATRIX) { 5565 /* If matrix was already created, we simply update values using SF objects 5566 * that as attached to the matrix ealier. 5567 * */ 5568 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5569 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5570 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5571 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5572 /* Update values in place */ 5573 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5574 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5575 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5576 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5577 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5578 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5579 PetscFunctionReturn(0); 5580 } 5581 5582 /*@C 5583 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5584 5585 Collective on Mat 5586 5587 Input Parameters: 5588 + A,B - the matrices in mpiaij format 5589 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5590 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5591 5592 Output Parameter: 5593 + rowb, colb - index sets of rows and columns of B to extract 5594 - B_seq - the sequential matrix generated 5595 5596 Level: developer 5597 5598 @*/ 5599 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5600 { 5601 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5602 PetscErrorCode ierr; 5603 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5604 IS isrowb,iscolb; 5605 Mat *bseq=NULL; 5606 5607 PetscFunctionBegin; 5608 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5609 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5610 } 5611 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5612 5613 if (scall == MAT_INITIAL_MATRIX) { 5614 start = A->cmap->rstart; 5615 cmap = a->garray; 5616 nzA = a->A->cmap->n; 5617 nzB = a->B->cmap->n; 5618 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5619 ncols = 0; 5620 for (i=0; i<nzB; i++) { /* row < local row index */ 5621 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5622 else break; 5623 } 5624 imark = i; 5625 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5626 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5627 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5628 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5629 } else { 5630 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5631 isrowb = *rowb; iscolb = *colb; 5632 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5633 bseq[0] = *B_seq; 5634 } 5635 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5636 *B_seq = bseq[0]; 5637 ierr = PetscFree(bseq);CHKERRQ(ierr); 5638 if (!rowb) { 5639 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5640 } else { 5641 *rowb = isrowb; 5642 } 5643 if (!colb) { 5644 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5645 } else { 5646 *colb = iscolb; 5647 } 5648 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5649 PetscFunctionReturn(0); 5650 } 5651 5652 /* 5653 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5654 of the OFF-DIAGONAL portion of local A 5655 5656 Collective on Mat 5657 5658 Input Parameters: 5659 + A,B - the matrices in mpiaij format 5660 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5661 5662 Output Parameter: 5663 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5664 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5665 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5666 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5667 5668 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5669 for this matrix. This is not desirable.. 5670 5671 Level: developer 5672 5673 */ 5674 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5675 { 5676 PetscErrorCode ierr; 5677 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5678 Mat_SeqAIJ *b_oth; 5679 VecScatter ctx; 5680 MPI_Comm comm; 5681 const PetscMPIInt *rprocs,*sprocs; 5682 const PetscInt *srow,*rstarts,*sstarts; 5683 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5684 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5685 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5686 MPI_Request *rwaits = NULL,*swaits = NULL; 5687 MPI_Status rstatus; 5688 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5689 5690 PetscFunctionBegin; 5691 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5692 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5693 5694 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5695 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5696 } 5697 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5698 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5699 5700 if (size == 1) { 5701 startsj_s = NULL; 5702 bufa_ptr = NULL; 5703 *B_oth = NULL; 5704 PetscFunctionReturn(0); 5705 } 5706 5707 ctx = a->Mvctx; 5708 tag = ((PetscObject)ctx)->tag; 5709 5710 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5711 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5712 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5713 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5714 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5715 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5716 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5717 5718 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5719 if (scall == MAT_INITIAL_MATRIX) { 5720 /* i-array */ 5721 /*---------*/ 5722 /* post receives */ 5723 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5724 for (i=0; i<nrecvs; i++) { 5725 rowlen = rvalues + rstarts[i]*rbs; 5726 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5727 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5728 } 5729 5730 /* pack the outgoing message */ 5731 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5732 5733 sstartsj[0] = 0; 5734 rstartsj[0] = 0; 5735 len = 0; /* total length of j or a array to be sent */ 5736 if (nsends) { 5737 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5738 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5739 } 5740 for (i=0; i<nsends; i++) { 5741 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5742 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5743 for (j=0; j<nrows; j++) { 5744 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5745 for (l=0; l<sbs; l++) { 5746 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5747 5748 rowlen[j*sbs+l] = ncols; 5749 5750 len += ncols; 5751 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5752 } 5753 k++; 5754 } 5755 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5756 5757 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5758 } 5759 /* recvs and sends of i-array are completed */ 5760 i = nrecvs; 5761 while (i--) { 5762 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5763 } 5764 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5765 ierr = PetscFree(svalues);CHKERRQ(ierr); 5766 5767 /* allocate buffers for sending j and a arrays */ 5768 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5769 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5770 5771 /* create i-array of B_oth */ 5772 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5773 5774 b_othi[0] = 0; 5775 len = 0; /* total length of j or a array to be received */ 5776 k = 0; 5777 for (i=0; i<nrecvs; i++) { 5778 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5779 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5780 for (j=0; j<nrows; j++) { 5781 b_othi[k+1] = b_othi[k] + rowlen[j]; 5782 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5783 k++; 5784 } 5785 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5786 } 5787 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5788 5789 /* allocate space for j and a arrrays of B_oth */ 5790 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5791 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5792 5793 /* j-array */ 5794 /*---------*/ 5795 /* post receives of j-array */ 5796 for (i=0; i<nrecvs; i++) { 5797 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5798 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5799 } 5800 5801 /* pack the outgoing message j-array */ 5802 if (nsends) k = sstarts[0]; 5803 for (i=0; i<nsends; i++) { 5804 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5805 bufJ = bufj+sstartsj[i]; 5806 for (j=0; j<nrows; j++) { 5807 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5808 for (ll=0; ll<sbs; ll++) { 5809 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5810 for (l=0; l<ncols; l++) { 5811 *bufJ++ = cols[l]; 5812 } 5813 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5814 } 5815 } 5816 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5817 } 5818 5819 /* recvs and sends of j-array are completed */ 5820 i = nrecvs; 5821 while (i--) { 5822 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5823 } 5824 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5825 } else if (scall == MAT_REUSE_MATRIX) { 5826 sstartsj = *startsj_s; 5827 rstartsj = *startsj_r; 5828 bufa = *bufa_ptr; 5829 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5830 b_otha = b_oth->a; 5831 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5832 5833 /* a-array */ 5834 /*---------*/ 5835 /* post receives of a-array */ 5836 for (i=0; i<nrecvs; i++) { 5837 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5838 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5839 } 5840 5841 /* pack the outgoing message a-array */ 5842 if (nsends) k = sstarts[0]; 5843 for (i=0; i<nsends; i++) { 5844 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5845 bufA = bufa+sstartsj[i]; 5846 for (j=0; j<nrows; j++) { 5847 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5848 for (ll=0; ll<sbs; ll++) { 5849 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5850 for (l=0; l<ncols; l++) { 5851 *bufA++ = vals[l]; 5852 } 5853 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5854 } 5855 } 5856 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5857 } 5858 /* recvs and sends of a-array are completed */ 5859 i = nrecvs; 5860 while (i--) { 5861 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5862 } 5863 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5864 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5865 5866 if (scall == MAT_INITIAL_MATRIX) { 5867 /* put together the new matrix */ 5868 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5869 5870 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5871 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5872 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5873 b_oth->free_a = PETSC_TRUE; 5874 b_oth->free_ij = PETSC_TRUE; 5875 b_oth->nonew = 0; 5876 5877 ierr = PetscFree(bufj);CHKERRQ(ierr); 5878 if (!startsj_s || !bufa_ptr) { 5879 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5880 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5881 } else { 5882 *startsj_s = sstartsj; 5883 *startsj_r = rstartsj; 5884 *bufa_ptr = bufa; 5885 } 5886 } 5887 5888 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5889 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5890 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5891 PetscFunctionReturn(0); 5892 } 5893 5894 /*@C 5895 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5896 5897 Not Collective 5898 5899 Input Parameters: 5900 . A - The matrix in mpiaij format 5901 5902 Output Parameter: 5903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5904 . colmap - A map from global column index to local index into lvec 5905 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5906 5907 Level: developer 5908 5909 @*/ 5910 #if defined(PETSC_USE_CTABLE) 5911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5912 #else 5913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5914 #endif 5915 { 5916 Mat_MPIAIJ *a; 5917 5918 PetscFunctionBegin; 5919 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5920 PetscValidPointer(lvec, 2); 5921 PetscValidPointer(colmap, 3); 5922 PetscValidPointer(multScatter, 4); 5923 a = (Mat_MPIAIJ*) A->data; 5924 if (lvec) *lvec = a->lvec; 5925 if (colmap) *colmap = a->colmap; 5926 if (multScatter) *multScatter = a->Mvctx; 5927 PetscFunctionReturn(0); 5928 } 5929 5930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5933 #if defined(PETSC_HAVE_MKL_SPARSE) 5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5935 #endif 5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5938 #if defined(PETSC_HAVE_ELEMENTAL) 5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5940 #endif 5941 #if defined(PETSC_HAVE_SCALAPACK) 5942 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5943 #endif 5944 #if defined(PETSC_HAVE_HYPRE) 5945 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5946 #endif 5947 #if defined(PETSC_HAVE_CUDA) 5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5949 #endif 5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5951 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5952 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5953 5954 /* 5955 Computes (B'*A')' since computing B*A directly is untenable 5956 5957 n p p 5958 [ ] [ ] [ ] 5959 m [ A ] * n [ B ] = m [ C ] 5960 [ ] [ ] [ ] 5961 5962 */ 5963 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5964 { 5965 PetscErrorCode ierr; 5966 Mat At,Bt,Ct; 5967 5968 PetscFunctionBegin; 5969 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5970 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5971 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5972 ierr = MatDestroy(&At);CHKERRQ(ierr); 5973 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5974 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5975 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5976 PetscFunctionReturn(0); 5977 } 5978 5979 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5980 { 5981 PetscErrorCode ierr; 5982 PetscBool cisdense; 5983 5984 PetscFunctionBegin; 5985 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5986 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5987 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5988 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5989 if (!cisdense) { 5990 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5991 } 5992 ierr = MatSetUp(C);CHKERRQ(ierr); 5993 5994 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5995 PetscFunctionReturn(0); 5996 } 5997 5998 /* ----------------------------------------------------------------*/ 5999 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6000 { 6001 Mat_Product *product = C->product; 6002 Mat A = product->A,B=product->B; 6003 6004 PetscFunctionBegin; 6005 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6006 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6007 6008 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6009 C->ops->productsymbolic = MatProductSymbolic_AB; 6010 PetscFunctionReturn(0); 6011 } 6012 6013 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6014 { 6015 PetscErrorCode ierr; 6016 Mat_Product *product = C->product; 6017 6018 PetscFunctionBegin; 6019 if (product->type == MATPRODUCT_AB) { 6020 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 6021 } 6022 PetscFunctionReturn(0); 6023 } 6024 /* ----------------------------------------------------------------*/ 6025 6026 /*MC 6027 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6028 6029 Options Database Keys: 6030 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6031 6032 Level: beginner 6033 6034 Notes: 6035 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6036 in this case the values associated with the rows and columns one passes in are set to zero 6037 in the matrix 6038 6039 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6040 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6041 6042 .seealso: MatCreateAIJ() 6043 M*/ 6044 6045 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6046 { 6047 Mat_MPIAIJ *b; 6048 PetscErrorCode ierr; 6049 PetscMPIInt size; 6050 6051 PetscFunctionBegin; 6052 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6053 6054 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6055 B->data = (void*)b; 6056 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6057 B->assembled = PETSC_FALSE; 6058 B->insertmode = NOT_SET_VALUES; 6059 b->size = size; 6060 6061 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6062 6063 /* build cache for off array entries formed */ 6064 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6065 6066 b->donotstash = PETSC_FALSE; 6067 b->colmap = NULL; 6068 b->garray = NULL; 6069 b->roworiented = PETSC_TRUE; 6070 6071 /* stuff used for matrix vector multiply */ 6072 b->lvec = NULL; 6073 b->Mvctx = NULL; 6074 6075 /* stuff for MatGetRow() */ 6076 b->rowindices = NULL; 6077 b->rowvalues = NULL; 6078 b->getrowactive = PETSC_FALSE; 6079 6080 /* flexible pointer used in CUSP/CUSPARSE classes */ 6081 b->spptr = NULL; 6082 6083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6093 #if defined(PETSC_HAVE_MKL_SPARSE) 6094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6095 #endif 6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6099 #if defined(PETSC_HAVE_ELEMENTAL) 6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6101 #endif 6102 #if defined(PETSC_HAVE_SCALAPACK) 6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 6104 #endif 6105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6107 #if defined(PETSC_HAVE_HYPRE) 6108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6110 #endif 6111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 6113 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6114 PetscFunctionReturn(0); 6115 } 6116 6117 /*@C 6118 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6119 and "off-diagonal" part of the matrix in CSR format. 6120 6121 Collective 6122 6123 Input Parameters: 6124 + comm - MPI communicator 6125 . m - number of local rows (Cannot be PETSC_DECIDE) 6126 . n - This value should be the same as the local size used in creating the 6127 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6128 calculated if N is given) For square matrices n is almost always m. 6129 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6130 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6131 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6132 . j - column indices 6133 . a - matrix values 6134 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6135 . oj - column indices 6136 - oa - matrix values 6137 6138 Output Parameter: 6139 . mat - the matrix 6140 6141 Level: advanced 6142 6143 Notes: 6144 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6145 must free the arrays once the matrix has been destroyed and not before. 6146 6147 The i and j indices are 0 based 6148 6149 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6150 6151 This sets local rows and cannot be used to set off-processor values. 6152 6153 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6154 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6155 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6156 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6157 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6158 communication if it is known that only local entries will be set. 6159 6160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6161 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6162 @*/ 6163 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6164 { 6165 PetscErrorCode ierr; 6166 Mat_MPIAIJ *maij; 6167 6168 PetscFunctionBegin; 6169 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6170 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6171 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6172 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6173 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6174 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6175 maij = (Mat_MPIAIJ*) (*mat)->data; 6176 6177 (*mat)->preallocated = PETSC_TRUE; 6178 6179 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6180 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6181 6182 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6183 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6184 6185 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6186 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6187 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6188 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6189 6190 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6191 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6192 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6193 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6194 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6195 PetscFunctionReturn(0); 6196 } 6197 6198 /* 6199 Special version for direct calls from Fortran 6200 */ 6201 #include <petsc/private/fortranimpl.h> 6202 6203 /* Change these macros so can be used in void function */ 6204 #undef CHKERRQ 6205 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6206 #undef SETERRQ2 6207 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6208 #undef SETERRQ3 6209 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6210 #undef SETERRQ 6211 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6212 6213 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6214 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6215 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6216 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6217 #else 6218 #endif 6219 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6220 { 6221 Mat mat = *mmat; 6222 PetscInt m = *mm, n = *mn; 6223 InsertMode addv = *maddv; 6224 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6225 PetscScalar value; 6226 PetscErrorCode ierr; 6227 6228 MatCheckPreallocated(mat,1); 6229 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6230 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6231 { 6232 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6233 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6234 PetscBool roworiented = aij->roworiented; 6235 6236 /* Some Variables required in the macro */ 6237 Mat A = aij->A; 6238 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6239 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6240 MatScalar *aa = a->a; 6241 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6242 Mat B = aij->B; 6243 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6244 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6245 MatScalar *ba = b->a; 6246 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6247 * cannot use "#if defined" inside a macro. */ 6248 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6249 6250 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6251 PetscInt nonew = a->nonew; 6252 MatScalar *ap1,*ap2; 6253 6254 PetscFunctionBegin; 6255 for (i=0; i<m; i++) { 6256 if (im[i] < 0) continue; 6257 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6258 if (im[i] >= rstart && im[i] < rend) { 6259 row = im[i] - rstart; 6260 lastcol1 = -1; 6261 rp1 = aj + ai[row]; 6262 ap1 = aa + ai[row]; 6263 rmax1 = aimax[row]; 6264 nrow1 = ailen[row]; 6265 low1 = 0; 6266 high1 = nrow1; 6267 lastcol2 = -1; 6268 rp2 = bj + bi[row]; 6269 ap2 = ba + bi[row]; 6270 rmax2 = bimax[row]; 6271 nrow2 = bilen[row]; 6272 low2 = 0; 6273 high2 = nrow2; 6274 6275 for (j=0; j<n; j++) { 6276 if (roworiented) value = v[i*n+j]; 6277 else value = v[i+j*m]; 6278 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6279 if (in[j] >= cstart && in[j] < cend) { 6280 col = in[j] - cstart; 6281 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6282 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6283 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6284 #endif 6285 } else if (in[j] < 0) continue; 6286 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6287 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6288 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6289 } else { 6290 if (mat->was_assembled) { 6291 if (!aij->colmap) { 6292 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6293 } 6294 #if defined(PETSC_USE_CTABLE) 6295 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6296 col--; 6297 #else 6298 col = aij->colmap[in[j]] - 1; 6299 #endif 6300 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6301 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6302 col = in[j]; 6303 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6304 B = aij->B; 6305 b = (Mat_SeqAIJ*)B->data; 6306 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6307 rp2 = bj + bi[row]; 6308 ap2 = ba + bi[row]; 6309 rmax2 = bimax[row]; 6310 nrow2 = bilen[row]; 6311 low2 = 0; 6312 high2 = nrow2; 6313 bm = aij->B->rmap->n; 6314 ba = b->a; 6315 inserted = PETSC_FALSE; 6316 } 6317 } else col = in[j]; 6318 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6319 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6320 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6321 #endif 6322 } 6323 } 6324 } else if (!aij->donotstash) { 6325 if (roworiented) { 6326 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6327 } else { 6328 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6329 } 6330 } 6331 } 6332 } 6333 PetscFunctionReturnVoid(); 6334 } 6335