1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = NULL; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1282 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 const PetscInt *garray = aij->garray; 1316 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1317 PetscInt *rowlens; 1318 PetscInt *colidxs; 1319 PetscScalar *matvals; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1324 1325 M = mat->rmap->N; 1326 N = mat->cmap->N; 1327 m = mat->rmap->n; 1328 rs = mat->rmap->rstart; 1329 cs = mat->cmap->rstart; 1330 nz = A->nz + B->nz; 1331 1332 /* write matrix header */ 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = M; header[2] = N; header[3] = nz; 1335 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1337 1338 /* fill in and store row lengths */ 1339 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1340 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1341 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1343 1344 /* fill in and store column indices */ 1345 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1346 for (cnt=0, i=0; i<m; i++) { 1347 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1348 if (garray[B->j[jb]] > cs) break; 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1352 colidxs[cnt++] = A->j[ja] + cs; 1353 for (; jb<B->i[i+1]; jb++) 1354 colidxs[cnt++] = garray[B->j[jb]]; 1355 } 1356 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1357 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1358 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1359 1360 /* fill in and store nonzero values */ 1361 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1362 for (cnt=0, i=0; i<m; i++) { 1363 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1364 if (garray[B->j[jb]] > cs) break; 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1368 matvals[cnt++] = A->a[ja]; 1369 for (; jb<B->i[i+1]; jb++) 1370 matvals[cnt++] = B->a[jb]; 1371 } 1372 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1373 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = PetscFree(matvals);CHKERRQ(ierr); 1375 1376 /* write block size option to the viewer's .info file */ 1377 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #include <petscdraw.h> 1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1383 { 1384 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1385 PetscErrorCode ierr; 1386 PetscMPIInt rank = aij->rank,size = aij->size; 1387 PetscBool isdraw,iascii,isbinary; 1388 PetscViewer sviewer; 1389 PetscViewerFormat format; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1394 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 if (iascii) { 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1398 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1399 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1400 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1401 for (i=0; i<(PetscInt)size; i++) { 1402 nmax = PetscMax(nmax,nz[i]); 1403 nmin = PetscMin(nmin,nz[i]); 1404 navg += nz[i]; 1405 } 1406 ierr = PetscFree(nz);CHKERRQ(ierr); 1407 navg = navg/size; 1408 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1413 MatInfo info; 1414 PetscBool inodes; 1415 1416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1417 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1418 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1420 if (!inodes) { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1425 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1426 } 1427 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1434 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1437 PetscInt inodecount,inodelimit,*inodes; 1438 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1439 if (inodes) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1446 PetscFunctionReturn(0); 1447 } 1448 } else if (isbinary) { 1449 if (size == 1) { 1450 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1451 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (iascii && size == 1) { 1457 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1458 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } else if (isdraw) { 1461 PetscDraw draw; 1462 PetscBool isnull; 1463 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1464 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1465 if (isnull) PetscFunctionReturn(0); 1466 } 1467 1468 { /* assemble the entire matrix onto first processor */ 1469 Mat A = NULL, Av; 1470 IS isrow,iscol; 1471 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1474 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1475 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1476 /* The commented code uses MatCreateSubMatrices instead */ 1477 /* 1478 Mat *AA, A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1486 A = AA[0]; 1487 Av = AA[0]; 1488 } 1489 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1490 */ 1491 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1492 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1493 /* 1494 Everyone has to call to draw the matrix since the graphics waits are 1495 synchronized across all processors that share the PetscDraw object 1496 */ 1497 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1498 if (!rank) { 1499 if (((PetscObject)mat)->name) { 1500 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1501 } 1502 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = NULL; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i]; 1680 PetscMPIInt rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt col = cdest[aj[j]]; 1684 PetscMPIInt cowner; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt col = gcdest[bj[j]]; 1691 PetscMPIInt cowner; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscLogDouble isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 case MAT_SORTED_FULL: 1819 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1820 break; 1821 case MAT_IGNORE_OFF_PROC_ENTRIES: 1822 a->donotstash = flg; 1823 break; 1824 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1825 case MAT_SPD: 1826 case MAT_SYMMETRIC: 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 case MAT_HERMITIAN: 1829 case MAT_SYMMETRY_ETERNAL: 1830 break; 1831 case MAT_SUBMAT_SINGLEIS: 1832 A->submat_singleis = flg; 1833 break; 1834 case MAT_STRUCTURE_ONLY: 1835 /* The option is handled directly by MatSetOption() */ 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1844 { 1845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1846 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1847 PetscErrorCode ierr; 1848 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1849 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1850 PetscInt *cmap,*idx_p; 1851 1852 PetscFunctionBegin; 1853 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1854 mat->getrowactive = PETSC_TRUE; 1855 1856 if (!mat->rowvalues && (idx || v)) { 1857 /* 1858 allocate enough space to hold information from the longest row. 1859 */ 1860 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1861 PetscInt max = 1,tmp; 1862 for (i=0; i<matin->rmap->n; i++) { 1863 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1864 if (max < tmp) max = tmp; 1865 } 1866 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1867 } 1868 1869 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1870 lrow = row - rstart; 1871 1872 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1873 if (!v) {pvA = NULL; pvB = NULL;} 1874 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1875 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1876 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1877 nztot = nzA + nzB; 1878 1879 cmap = mat->garray; 1880 if (v || idx) { 1881 if (nztot) { 1882 /* Sort by increasing column numbers, assuming A and B already sorted */ 1883 PetscInt imark = -1; 1884 if (v) { 1885 *v = v_p = mat->rowvalues; 1886 for (i=0; i<nzB; i++) { 1887 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1888 else break; 1889 } 1890 imark = i; 1891 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1892 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1893 } 1894 if (idx) { 1895 *idx = idx_p = mat->rowindices; 1896 if (imark > -1) { 1897 for (i=0; i<imark; i++) { 1898 idx_p[i] = cmap[cworkB[i]]; 1899 } 1900 } else { 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1903 else break; 1904 } 1905 imark = i; 1906 } 1907 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1908 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 if (idx) *idx = NULL; 1912 if (v) *v = NULL; 1913 } 1914 } 1915 *nz = nztot; 1916 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1917 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1922 { 1923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1924 1925 PetscFunctionBegin; 1926 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1927 aij->getrowactive = PETSC_FALSE; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1932 { 1933 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1934 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1935 PetscErrorCode ierr; 1936 PetscInt i,j,cstart = mat->cmap->rstart; 1937 PetscReal sum = 0.0; 1938 MatScalar *v; 1939 1940 PetscFunctionBegin; 1941 if (aij->size == 1) { 1942 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1943 } else { 1944 if (type == NORM_FROBENIUS) { 1945 v = amat->a; 1946 for (i=0; i<amat->nz; i++) { 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1952 } 1953 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1999 { 2000 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2001 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2002 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2003 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 const MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2237 will be updated */ 2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2239 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2240 Y->offloadmask = PETSC_OFFLOAD_CPU; 2241 } 2242 #endif 2243 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2244 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2245 } else { 2246 Mat B; 2247 PetscInt *nnz_d,*nnz_o; 2248 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2249 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2250 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2252 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2253 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2254 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2255 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2256 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2257 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2258 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2259 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2260 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2266 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2283 { 2284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2289 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 PetscInt i,*idxb = NULL; 2309 PetscScalar *va,*vb; 2310 Vec vtmp; 2311 2312 PetscFunctionBegin; 2313 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2314 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2315 if (idx) { 2316 for (i=0; i<A->rmap->n; i++) { 2317 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2318 } 2319 } 2320 2321 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2322 if (idx) { 2323 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2324 } 2325 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2326 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2327 2328 for (i=0; i<A->rmap->n; i++) { 2329 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2330 va[i] = vb[i]; 2331 if (idx) idx[i] = a->garray[idxb[i]]; 2332 } 2333 } 2334 2335 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2336 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2337 ierr = PetscFree(idxb);CHKERRQ(ierr); 2338 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2343 { 2344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2345 PetscErrorCode ierr; 2346 PetscInt i,*idxb = NULL; 2347 PetscScalar *va,*vb; 2348 Vec vtmp; 2349 2350 PetscFunctionBegin; 2351 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2352 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2353 if (idx) { 2354 for (i=0; i<A->cmap->n; i++) { 2355 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2356 } 2357 } 2358 2359 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2360 if (idx) { 2361 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2362 } 2363 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2364 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2365 2366 for (i=0; i<A->rmap->n; i++) { 2367 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2368 va[i] = vb[i]; 2369 if (idx) idx[i] = a->garray[idxb[i]]; 2370 } 2371 } 2372 2373 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2374 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2375 ierr = PetscFree(idxb);CHKERRQ(ierr); 2376 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2381 { 2382 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2383 PetscInt n = A->rmap->n; 2384 PetscInt cstart = A->cmap->rstart; 2385 PetscInt *cmap = mat->garray; 2386 PetscInt *diagIdx, *offdiagIdx; 2387 Vec diagV, offdiagV; 2388 PetscScalar *a, *diagA, *offdiagA; 2389 PetscInt r; 2390 PetscErrorCode ierr; 2391 2392 PetscFunctionBegin; 2393 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2394 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2395 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2396 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2397 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2398 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2399 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2400 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2401 for (r = 0; r < n; ++r) { 2402 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2403 a[r] = diagA[r]; 2404 idx[r] = cstart + diagIdx[r]; 2405 } else { 2406 a[r] = offdiagA[r]; 2407 idx[r] = cmap[offdiagIdx[r]]; 2408 } 2409 } 2410 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2411 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2412 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2413 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2414 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2415 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2420 { 2421 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2422 PetscInt m = A->rmap->n,n = A->cmap->n; 2423 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2424 PetscInt *cmap = mat->garray; 2425 PetscInt *diagIdx, *offdiagIdx; 2426 Vec diagV, offdiagV; 2427 PetscScalar *a, *diagA, *offdiagA, *ba; 2428 PetscInt r,j,col,ncols,*bi,*bj; 2429 PetscErrorCode ierr; 2430 Mat B = mat->B; 2431 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2432 2433 PetscFunctionBegin; 2434 /* When a process holds entire A and other processes have no entry */ 2435 if (A->cmap->N == n) { 2436 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr); 2437 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr); 2438 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr); 2439 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2440 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr); 2441 PetscFunctionReturn(0); 2442 } else if (n == 0) { 2443 if (m) { 2444 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr); 2445 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2446 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr); 2447 } 2448 PetscFunctionReturn(0); 2449 } 2450 2451 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr); 2452 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr); 2453 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr); 2454 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2455 2456 /* Get offdiagIdx[] for implicit 0.0 */ 2457 ba = b->a; 2458 bi = b->i; 2459 bj = b->j; 2460 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr); 2461 for (r = 0; r < m; r++) { 2462 ncols = bi[r+1] - bi[r]; 2463 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2464 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2465 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2466 offdiagA[r] = 0.0; 2467 2468 /* Find first hole in the cmap */ 2469 for (j=0; j<ncols; j++) { 2470 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2471 if (col > j && j < cstart) { 2472 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2473 break; 2474 } else if (col > j + n && j >= cstart) { 2475 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2476 break; 2477 } 2478 } 2479 if (j == ncols && B->cmap->N < A->cmap->N - n) { 2480 /* a hole is outside compressed Bcols */ 2481 if (ncols == 0) { 2482 if (cstart) { 2483 offdiagIdx[r] = 0; 2484 } else offdiagIdx[r] = cend; 2485 } else { /* ncols > 0 */ 2486 offdiagIdx[r] = cmap[ncols-1] + 1; 2487 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2488 } 2489 } 2490 } 2491 2492 for (j=0; j<ncols; j++) { 2493 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2494 ba++; bj++; 2495 } 2496 } 2497 2498 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr); 2499 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr); 2500 for (r = 0; r < m; ++r) { 2501 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2502 a[r] = diagA[r]; 2503 if (idx) idx[r] = cstart + diagIdx[r]; 2504 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2505 a[r] = diagA[r]; 2506 if (idx) { 2507 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2508 idx[r] = cstart + diagIdx[r]; 2509 } else idx[r] = offdiagIdx[r]; 2510 } 2511 } else { 2512 a[r] = offdiagA[r]; 2513 if (idx) idx[r] = offdiagIdx[r]; 2514 } 2515 } 2516 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr); 2517 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr); 2518 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr); 2519 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2520 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2521 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2526 { 2527 PetscErrorCode ierr; 2528 Mat *dummy; 2529 2530 PetscFunctionBegin; 2531 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2532 *newmat = *dummy; 2533 ierr = PetscFree(dummy);CHKERRQ(ierr); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2538 { 2539 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2540 PetscErrorCode ierr; 2541 2542 PetscFunctionBegin; 2543 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2544 A->factorerrortype = a->A->factorerrortype; 2545 PetscFunctionReturn(0); 2546 } 2547 2548 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2549 { 2550 PetscErrorCode ierr; 2551 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2552 2553 PetscFunctionBegin; 2554 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2555 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2556 if (x->assembled) { 2557 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2558 } else { 2559 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2560 } 2561 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2562 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2563 PetscFunctionReturn(0); 2564 } 2565 2566 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2567 { 2568 PetscFunctionBegin; 2569 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2570 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2571 PetscFunctionReturn(0); 2572 } 2573 2574 /*@ 2575 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2576 2577 Collective on Mat 2578 2579 Input Parameters: 2580 + A - the matrix 2581 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2582 2583 Level: advanced 2584 2585 @*/ 2586 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2587 { 2588 PetscErrorCode ierr; 2589 2590 PetscFunctionBegin; 2591 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2596 { 2597 PetscErrorCode ierr; 2598 PetscBool sc = PETSC_FALSE,flg; 2599 2600 PetscFunctionBegin; 2601 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2602 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2603 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2604 if (flg) { 2605 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2606 } 2607 ierr = PetscOptionsTail();CHKERRQ(ierr); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2612 { 2613 PetscErrorCode ierr; 2614 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2615 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2616 2617 PetscFunctionBegin; 2618 if (!Y->preallocated) { 2619 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2620 } else if (!aij->nz) { 2621 PetscInt nonew = aij->nonew; 2622 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2623 aij->nonew = nonew; 2624 } 2625 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2626 PetscFunctionReturn(0); 2627 } 2628 2629 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2630 { 2631 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2632 PetscErrorCode ierr; 2633 2634 PetscFunctionBegin; 2635 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2636 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2637 if (d) { 2638 PetscInt rstart; 2639 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2640 *d += rstart; 2641 2642 } 2643 PetscFunctionReturn(0); 2644 } 2645 2646 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2647 { 2648 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2649 PetscErrorCode ierr; 2650 2651 PetscFunctionBegin; 2652 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 /* -------------------------------------------------------------------*/ 2657 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2658 MatGetRow_MPIAIJ, 2659 MatRestoreRow_MPIAIJ, 2660 MatMult_MPIAIJ, 2661 /* 4*/ MatMultAdd_MPIAIJ, 2662 MatMultTranspose_MPIAIJ, 2663 MatMultTransposeAdd_MPIAIJ, 2664 NULL, 2665 NULL, 2666 NULL, 2667 /*10*/ NULL, 2668 NULL, 2669 NULL, 2670 MatSOR_MPIAIJ, 2671 MatTranspose_MPIAIJ, 2672 /*15*/ MatGetInfo_MPIAIJ, 2673 MatEqual_MPIAIJ, 2674 MatGetDiagonal_MPIAIJ, 2675 MatDiagonalScale_MPIAIJ, 2676 MatNorm_MPIAIJ, 2677 /*20*/ MatAssemblyBegin_MPIAIJ, 2678 MatAssemblyEnd_MPIAIJ, 2679 MatSetOption_MPIAIJ, 2680 MatZeroEntries_MPIAIJ, 2681 /*24*/ MatZeroRows_MPIAIJ, 2682 NULL, 2683 NULL, 2684 NULL, 2685 NULL, 2686 /*29*/ MatSetUp_MPIAIJ, 2687 NULL, 2688 NULL, 2689 MatGetDiagonalBlock_MPIAIJ, 2690 NULL, 2691 /*34*/ MatDuplicate_MPIAIJ, 2692 NULL, 2693 NULL, 2694 NULL, 2695 NULL, 2696 /*39*/ MatAXPY_MPIAIJ, 2697 MatCreateSubMatrices_MPIAIJ, 2698 MatIncreaseOverlap_MPIAIJ, 2699 MatGetValues_MPIAIJ, 2700 MatCopy_MPIAIJ, 2701 /*44*/ MatGetRowMax_MPIAIJ, 2702 MatScale_MPIAIJ, 2703 MatShift_MPIAIJ, 2704 MatDiagonalSet_MPIAIJ, 2705 MatZeroRowsColumns_MPIAIJ, 2706 /*49*/ MatSetRandom_MPIAIJ, 2707 NULL, 2708 NULL, 2709 NULL, 2710 NULL, 2711 /*54*/ MatFDColoringCreate_MPIXAIJ, 2712 NULL, 2713 MatSetUnfactored_MPIAIJ, 2714 MatPermute_MPIAIJ, 2715 NULL, 2716 /*59*/ MatCreateSubMatrix_MPIAIJ, 2717 MatDestroy_MPIAIJ, 2718 MatView_MPIAIJ, 2719 NULL, 2720 NULL, 2721 /*64*/ NULL, 2722 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2723 NULL, 2724 NULL, 2725 NULL, 2726 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2727 MatGetRowMinAbs_MPIAIJ, 2728 NULL, 2729 NULL, 2730 NULL, 2731 NULL, 2732 /*75*/ MatFDColoringApply_AIJ, 2733 MatSetFromOptions_MPIAIJ, 2734 NULL, 2735 NULL, 2736 MatFindZeroDiagonals_MPIAIJ, 2737 /*80*/ NULL, 2738 NULL, 2739 NULL, 2740 /*83*/ MatLoad_MPIAIJ, 2741 MatIsSymmetric_MPIAIJ, 2742 NULL, 2743 NULL, 2744 NULL, 2745 NULL, 2746 /*89*/ NULL, 2747 NULL, 2748 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2749 NULL, 2750 NULL, 2751 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 MatBindToCPU_MPIAIJ, 2756 /*99*/ MatProductSetFromOptions_MPIAIJ, 2757 NULL, 2758 NULL, 2759 MatConjugate_MPIAIJ, 2760 NULL, 2761 /*104*/MatSetValuesRow_MPIAIJ, 2762 MatRealPart_MPIAIJ, 2763 MatImaginaryPart_MPIAIJ, 2764 NULL, 2765 NULL, 2766 /*109*/NULL, 2767 NULL, 2768 MatGetRowMin_MPIAIJ, 2769 NULL, 2770 MatMissingDiagonal_MPIAIJ, 2771 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2772 NULL, 2773 MatGetGhosts_MPIAIJ, 2774 NULL, 2775 NULL, 2776 /*119*/NULL, 2777 NULL, 2778 NULL, 2779 NULL, 2780 MatGetMultiProcBlock_MPIAIJ, 2781 /*124*/MatFindNonzeroRows_MPIAIJ, 2782 MatGetColumnNorms_MPIAIJ, 2783 MatInvertBlockDiagonal_MPIAIJ, 2784 MatInvertVariableBlockDiagonal_MPIAIJ, 2785 MatCreateSubMatricesMPI_MPIAIJ, 2786 /*129*/NULL, 2787 NULL, 2788 NULL, 2789 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2790 NULL, 2791 /*134*/NULL, 2792 NULL, 2793 NULL, 2794 NULL, 2795 NULL, 2796 /*139*/MatSetBlockSizes_MPIAIJ, 2797 NULL, 2798 NULL, 2799 MatFDColoringSetUp_MPIXAIJ, 2800 MatFindOffBlockDiagonalEntries_MPIAIJ, 2801 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2802 /*145*/NULL, 2803 NULL, 2804 NULL 2805 }; 2806 2807 /* ----------------------------------------------------------------------------------------*/ 2808 2809 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2810 { 2811 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2816 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2817 PetscFunctionReturn(0); 2818 } 2819 2820 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2821 { 2822 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2823 PetscErrorCode ierr; 2824 2825 PetscFunctionBegin; 2826 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2827 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2832 { 2833 Mat_MPIAIJ *b; 2834 PetscErrorCode ierr; 2835 PetscMPIInt size; 2836 2837 PetscFunctionBegin; 2838 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2839 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2840 b = (Mat_MPIAIJ*)B->data; 2841 2842 #if defined(PETSC_USE_CTABLE) 2843 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2844 #else 2845 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2846 #endif 2847 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2848 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2849 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2850 2851 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2852 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2853 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2854 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2855 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2856 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2857 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2859 2860 if (!B->preallocated) { 2861 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2862 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2863 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2864 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2865 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2866 } 2867 2868 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2869 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2870 B->preallocated = PETSC_TRUE; 2871 B->was_assembled = PETSC_FALSE; 2872 B->assembled = PETSC_FALSE; 2873 PetscFunctionReturn(0); 2874 } 2875 2876 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2877 { 2878 Mat_MPIAIJ *b; 2879 PetscErrorCode ierr; 2880 2881 PetscFunctionBegin; 2882 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2883 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2884 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2885 b = (Mat_MPIAIJ*)B->data; 2886 2887 #if defined(PETSC_USE_CTABLE) 2888 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2889 #else 2890 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2891 #endif 2892 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2893 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2894 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2895 2896 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2897 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2898 B->preallocated = PETSC_TRUE; 2899 B->was_assembled = PETSC_FALSE; 2900 B->assembled = PETSC_FALSE; 2901 PetscFunctionReturn(0); 2902 } 2903 2904 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2905 { 2906 Mat mat; 2907 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2908 PetscErrorCode ierr; 2909 2910 PetscFunctionBegin; 2911 *newmat = NULL; 2912 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2913 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2914 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2915 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2916 a = (Mat_MPIAIJ*)mat->data; 2917 2918 mat->factortype = matin->factortype; 2919 mat->assembled = matin->assembled; 2920 mat->insertmode = NOT_SET_VALUES; 2921 mat->preallocated = matin->preallocated; 2922 2923 a->size = oldmat->size; 2924 a->rank = oldmat->rank; 2925 a->donotstash = oldmat->donotstash; 2926 a->roworiented = oldmat->roworiented; 2927 a->rowindices = NULL; 2928 a->rowvalues = NULL; 2929 a->getrowactive = PETSC_FALSE; 2930 2931 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2932 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2933 2934 if (oldmat->colmap) { 2935 #if defined(PETSC_USE_CTABLE) 2936 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2937 #else 2938 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2939 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2940 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2941 #endif 2942 } else a->colmap = NULL; 2943 if (oldmat->garray) { 2944 PetscInt len; 2945 len = oldmat->B->cmap->n; 2946 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2947 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2948 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2949 } else a->garray = NULL; 2950 2951 /* It may happen MatDuplicate is called with a non-assembled matrix 2952 In fact, MatDuplicate only requires the matrix to be preallocated 2953 This may happen inside a DMCreateMatrix_Shell */ 2954 if (oldmat->lvec) { 2955 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2956 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2957 } 2958 if (oldmat->Mvctx) { 2959 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2960 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2961 } 2962 if (oldmat->Mvctx_mpi1) { 2963 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2964 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2965 } 2966 2967 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2968 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2969 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2970 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2971 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2972 *newmat = mat; 2973 PetscFunctionReturn(0); 2974 } 2975 2976 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2977 { 2978 PetscBool isbinary, ishdf5; 2979 PetscErrorCode ierr; 2980 2981 PetscFunctionBegin; 2982 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2983 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2984 /* force binary viewer to load .info file if it has not yet done so */ 2985 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2986 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2987 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2988 if (isbinary) { 2989 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2990 } else if (ishdf5) { 2991 #if defined(PETSC_HAVE_HDF5) 2992 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2993 #else 2994 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2995 #endif 2996 } else { 2997 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2998 } 2999 PetscFunctionReturn(0); 3000 } 3001 3002 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3003 { 3004 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3005 PetscInt *rowidxs,*colidxs; 3006 PetscScalar *matvals; 3007 PetscErrorCode ierr; 3008 3009 PetscFunctionBegin; 3010 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3011 3012 /* read in matrix header */ 3013 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3014 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3015 M = header[1]; N = header[2]; nz = header[3]; 3016 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3017 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3018 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3019 3020 /* set block sizes from the viewer's .info file */ 3021 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3022 /* set global sizes if not set already */ 3023 if (mat->rmap->N < 0) mat->rmap->N = M; 3024 if (mat->cmap->N < 0) mat->cmap->N = N; 3025 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3026 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3027 3028 /* check if the matrix sizes are correct */ 3029 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3030 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3031 3032 /* read in row lengths and build row indices */ 3033 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 3034 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3035 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3036 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3037 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3038 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3039 /* read in column indices and matrix values */ 3040 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3041 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3042 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3043 /* store matrix indices and values */ 3044 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 3045 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3046 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3047 PetscFunctionReturn(0); 3048 } 3049 3050 /* Not scalable because of ISAllGather() unless getting all columns. */ 3051 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3052 { 3053 PetscErrorCode ierr; 3054 IS iscol_local; 3055 PetscBool isstride; 3056 PetscMPIInt lisstride=0,gisstride; 3057 3058 PetscFunctionBegin; 3059 /* check if we are grabbing all columns*/ 3060 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3061 3062 if (isstride) { 3063 PetscInt start,len,mstart,mlen; 3064 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3065 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3066 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3067 if (mstart == start && mlen-mstart == len) lisstride = 1; 3068 } 3069 3070 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3071 if (gisstride) { 3072 PetscInt N; 3073 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3074 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3075 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3076 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3077 } else { 3078 PetscInt cbs; 3079 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3080 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3081 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3082 } 3083 3084 *isseq = iscol_local; 3085 PetscFunctionReturn(0); 3086 } 3087 3088 /* 3089 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3090 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3091 3092 Input Parameters: 3093 mat - matrix 3094 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3095 i.e., mat->rstart <= isrow[i] < mat->rend 3096 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3097 i.e., mat->cstart <= iscol[i] < mat->cend 3098 Output Parameter: 3099 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3100 iscol_o - sequential column index set for retrieving mat->B 3101 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3102 */ 3103 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3104 { 3105 PetscErrorCode ierr; 3106 Vec x,cmap; 3107 const PetscInt *is_idx; 3108 PetscScalar *xarray,*cmaparray; 3109 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3110 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3111 Mat B=a->B; 3112 Vec lvec=a->lvec,lcmap; 3113 PetscInt i,cstart,cend,Bn=B->cmap->N; 3114 MPI_Comm comm; 3115 VecScatter Mvctx=a->Mvctx; 3116 3117 PetscFunctionBegin; 3118 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3119 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3120 3121 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3122 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3123 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3124 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3125 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3126 3127 /* Get start indices */ 3128 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3129 isstart -= ncols; 3130 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3131 3132 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3133 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3134 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3135 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3136 for (i=0; i<ncols; i++) { 3137 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3138 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3139 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3140 } 3141 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3142 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3143 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3144 3145 /* Get iscol_d */ 3146 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3147 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3148 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3149 3150 /* Get isrow_d */ 3151 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3152 rstart = mat->rmap->rstart; 3153 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3154 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3155 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3156 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3157 3158 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3159 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3160 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3161 3162 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3163 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3164 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3165 3166 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3167 3168 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3169 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3170 3171 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3172 /* off-process column indices */ 3173 count = 0; 3174 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3175 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3176 3177 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3178 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3179 for (i=0; i<Bn; i++) { 3180 if (PetscRealPart(xarray[i]) > -1.0) { 3181 idx[count] = i; /* local column index in off-diagonal part B */ 3182 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3183 count++; 3184 } 3185 } 3186 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3187 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3188 3189 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3190 /* cannot ensure iscol_o has same blocksize as iscol! */ 3191 3192 ierr = PetscFree(idx);CHKERRQ(ierr); 3193 *garray = cmap1; 3194 3195 ierr = VecDestroy(&x);CHKERRQ(ierr); 3196 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3197 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3198 PetscFunctionReturn(0); 3199 } 3200 3201 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3202 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3203 { 3204 PetscErrorCode ierr; 3205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3206 Mat M = NULL; 3207 MPI_Comm comm; 3208 IS iscol_d,isrow_d,iscol_o; 3209 Mat Asub = NULL,Bsub = NULL; 3210 PetscInt n; 3211 3212 PetscFunctionBegin; 3213 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3214 3215 if (call == MAT_REUSE_MATRIX) { 3216 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3217 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3218 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3219 3220 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3221 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3222 3223 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3224 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3225 3226 /* Update diagonal and off-diagonal portions of submat */ 3227 asub = (Mat_MPIAIJ*)(*submat)->data; 3228 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3229 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3230 if (n) { 3231 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3232 } 3233 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3234 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3235 3236 } else { /* call == MAT_INITIAL_MATRIX) */ 3237 const PetscInt *garray; 3238 PetscInt BsubN; 3239 3240 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3241 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3242 3243 /* Create local submatrices Asub and Bsub */ 3244 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3245 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3246 3247 /* Create submatrix M */ 3248 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3249 3250 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3251 asub = (Mat_MPIAIJ*)M->data; 3252 3253 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3254 n = asub->B->cmap->N; 3255 if (BsubN > n) { 3256 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3257 const PetscInt *idx; 3258 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3259 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3260 3261 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3262 j = 0; 3263 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3264 for (i=0; i<n; i++) { 3265 if (j >= BsubN) break; 3266 while (subgarray[i] > garray[j]) j++; 3267 3268 if (subgarray[i] == garray[j]) { 3269 idx_new[i] = idx[j++]; 3270 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3271 } 3272 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3273 3274 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3275 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3276 3277 } else if (BsubN < n) { 3278 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3279 } 3280 3281 ierr = PetscFree(garray);CHKERRQ(ierr); 3282 *submat = M; 3283 3284 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3285 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3286 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3287 3288 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3289 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3290 3291 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3292 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3293 } 3294 PetscFunctionReturn(0); 3295 } 3296 3297 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3298 { 3299 PetscErrorCode ierr; 3300 IS iscol_local=NULL,isrow_d; 3301 PetscInt csize; 3302 PetscInt n,i,j,start,end; 3303 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3304 MPI_Comm comm; 3305 3306 PetscFunctionBegin; 3307 /* If isrow has same processor distribution as mat, 3308 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3309 if (call == MAT_REUSE_MATRIX) { 3310 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3311 if (isrow_d) { 3312 sameRowDist = PETSC_TRUE; 3313 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3314 } else { 3315 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3316 if (iscol_local) { 3317 sameRowDist = PETSC_TRUE; 3318 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3319 } 3320 } 3321 } else { 3322 /* Check if isrow has same processor distribution as mat */ 3323 sameDist[0] = PETSC_FALSE; 3324 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3325 if (!n) { 3326 sameDist[0] = PETSC_TRUE; 3327 } else { 3328 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3329 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3330 if (i >= start && j < end) { 3331 sameDist[0] = PETSC_TRUE; 3332 } 3333 } 3334 3335 /* Check if iscol has same processor distribution as mat */ 3336 sameDist[1] = PETSC_FALSE; 3337 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3338 if (!n) { 3339 sameDist[1] = PETSC_TRUE; 3340 } else { 3341 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3342 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3343 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3344 } 3345 3346 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3347 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3348 sameRowDist = tsameDist[0]; 3349 } 3350 3351 if (sameRowDist) { 3352 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3353 /* isrow and iscol have same processor distribution as mat */ 3354 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3355 PetscFunctionReturn(0); 3356 } else { /* sameRowDist */ 3357 /* isrow has same processor distribution as mat */ 3358 if (call == MAT_INITIAL_MATRIX) { 3359 PetscBool sorted; 3360 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3361 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3362 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3363 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3364 3365 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3366 if (sorted) { 3367 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3368 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3369 PetscFunctionReturn(0); 3370 } 3371 } else { /* call == MAT_REUSE_MATRIX */ 3372 IS iscol_sub; 3373 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3374 if (iscol_sub) { 3375 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3376 PetscFunctionReturn(0); 3377 } 3378 } 3379 } 3380 } 3381 3382 /* General case: iscol -> iscol_local which has global size of iscol */ 3383 if (call == MAT_REUSE_MATRIX) { 3384 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3385 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3386 } else { 3387 if (!iscol_local) { 3388 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3389 } 3390 } 3391 3392 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3393 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3394 3395 if (call == MAT_INITIAL_MATRIX) { 3396 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3397 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3398 } 3399 PetscFunctionReturn(0); 3400 } 3401 3402 /*@C 3403 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3404 and "off-diagonal" part of the matrix in CSR format. 3405 3406 Collective 3407 3408 Input Parameters: 3409 + comm - MPI communicator 3410 . A - "diagonal" portion of matrix 3411 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3412 - garray - global index of B columns 3413 3414 Output Parameter: 3415 . mat - the matrix, with input A as its local diagonal matrix 3416 Level: advanced 3417 3418 Notes: 3419 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3420 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3421 3422 .seealso: MatCreateMPIAIJWithSplitArrays() 3423 @*/ 3424 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3425 { 3426 PetscErrorCode ierr; 3427 Mat_MPIAIJ *maij; 3428 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3429 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3430 PetscScalar *oa=b->a; 3431 Mat Bnew; 3432 PetscInt m,n,N; 3433 3434 PetscFunctionBegin; 3435 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3436 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3437 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3438 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3439 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3440 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3441 3442 /* Get global columns of mat */ 3443 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3444 3445 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3446 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3447 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3448 maij = (Mat_MPIAIJ*)(*mat)->data; 3449 3450 (*mat)->preallocated = PETSC_TRUE; 3451 3452 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3453 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3454 3455 /* Set A as diagonal portion of *mat */ 3456 maij->A = A; 3457 3458 nz = oi[m]; 3459 for (i=0; i<nz; i++) { 3460 col = oj[i]; 3461 oj[i] = garray[col]; 3462 } 3463 3464 /* Set Bnew as off-diagonal portion of *mat */ 3465 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3466 bnew = (Mat_SeqAIJ*)Bnew->data; 3467 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3468 maij->B = Bnew; 3469 3470 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3471 3472 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3473 b->free_a = PETSC_FALSE; 3474 b->free_ij = PETSC_FALSE; 3475 ierr = MatDestroy(&B);CHKERRQ(ierr); 3476 3477 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3478 bnew->free_a = PETSC_TRUE; 3479 bnew->free_ij = PETSC_TRUE; 3480 3481 /* condense columns of maij->B */ 3482 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3483 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3484 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3485 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3486 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3487 PetscFunctionReturn(0); 3488 } 3489 3490 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3491 3492 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3493 { 3494 PetscErrorCode ierr; 3495 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3496 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3497 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3498 Mat M,Msub,B=a->B; 3499 MatScalar *aa; 3500 Mat_SeqAIJ *aij; 3501 PetscInt *garray = a->garray,*colsub,Ncols; 3502 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3503 IS iscol_sub,iscmap; 3504 const PetscInt *is_idx,*cmap; 3505 PetscBool allcolumns=PETSC_FALSE; 3506 MPI_Comm comm; 3507 3508 PetscFunctionBegin; 3509 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3510 3511 if (call == MAT_REUSE_MATRIX) { 3512 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3513 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3514 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3515 3516 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3517 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3518 3519 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3520 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3521 3522 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3523 3524 } else { /* call == MAT_INITIAL_MATRIX) */ 3525 PetscBool flg; 3526 3527 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3528 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3529 3530 /* (1) iscol -> nonscalable iscol_local */ 3531 /* Check for special case: each processor gets entire matrix columns */ 3532 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3533 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3534 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3535 if (allcolumns) { 3536 iscol_sub = iscol_local; 3537 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3538 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3539 3540 } else { 3541 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3542 PetscInt *idx,*cmap1,k; 3543 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3544 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3545 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3546 count = 0; 3547 k = 0; 3548 for (i=0; i<Ncols; i++) { 3549 j = is_idx[i]; 3550 if (j >= cstart && j < cend) { 3551 /* diagonal part of mat */ 3552 idx[count] = j; 3553 cmap1[count++] = i; /* column index in submat */ 3554 } else if (Bn) { 3555 /* off-diagonal part of mat */ 3556 if (j == garray[k]) { 3557 idx[count] = j; 3558 cmap1[count++] = i; /* column index in submat */ 3559 } else if (j > garray[k]) { 3560 while (j > garray[k] && k < Bn-1) k++; 3561 if (j == garray[k]) { 3562 idx[count] = j; 3563 cmap1[count++] = i; /* column index in submat */ 3564 } 3565 } 3566 } 3567 } 3568 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3569 3570 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3571 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3572 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3573 3574 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3575 } 3576 3577 /* (3) Create sequential Msub */ 3578 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3579 } 3580 3581 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3582 aij = (Mat_SeqAIJ*)(Msub)->data; 3583 ii = aij->i; 3584 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3585 3586 /* 3587 m - number of local rows 3588 Ncols - number of columns (same on all processors) 3589 rstart - first row in new global matrix generated 3590 */ 3591 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3592 3593 if (call == MAT_INITIAL_MATRIX) { 3594 /* (4) Create parallel newmat */ 3595 PetscMPIInt rank,size; 3596 PetscInt csize; 3597 3598 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3600 3601 /* 3602 Determine the number of non-zeros in the diagonal and off-diagonal 3603 portions of the matrix in order to do correct preallocation 3604 */ 3605 3606 /* first get start and end of "diagonal" columns */ 3607 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3608 if (csize == PETSC_DECIDE) { 3609 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3610 if (mglobal == Ncols) { /* square matrix */ 3611 nlocal = m; 3612 } else { 3613 nlocal = Ncols/size + ((Ncols % size) > rank); 3614 } 3615 } else { 3616 nlocal = csize; 3617 } 3618 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3619 rstart = rend - nlocal; 3620 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3621 3622 /* next, compute all the lengths */ 3623 jj = aij->j; 3624 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3625 olens = dlens + m; 3626 for (i=0; i<m; i++) { 3627 jend = ii[i+1] - ii[i]; 3628 olen = 0; 3629 dlen = 0; 3630 for (j=0; j<jend; j++) { 3631 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3632 else dlen++; 3633 jj++; 3634 } 3635 olens[i] = olen; 3636 dlens[i] = dlen; 3637 } 3638 3639 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3640 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3641 3642 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3643 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3644 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3645 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3646 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3647 ierr = PetscFree(dlens);CHKERRQ(ierr); 3648 3649 } else { /* call == MAT_REUSE_MATRIX */ 3650 M = *newmat; 3651 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3652 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3653 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3654 /* 3655 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3656 rather than the slower MatSetValues(). 3657 */ 3658 M->was_assembled = PETSC_TRUE; 3659 M->assembled = PETSC_FALSE; 3660 } 3661 3662 /* (5) Set values of Msub to *newmat */ 3663 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3664 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3665 3666 jj = aij->j; 3667 aa = aij->a; 3668 for (i=0; i<m; i++) { 3669 row = rstart + i; 3670 nz = ii[i+1] - ii[i]; 3671 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3672 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3673 jj += nz; aa += nz; 3674 } 3675 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3676 3677 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3678 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3679 3680 ierr = PetscFree(colsub);CHKERRQ(ierr); 3681 3682 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3683 if (call == MAT_INITIAL_MATRIX) { 3684 *newmat = M; 3685 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3686 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3687 3688 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3689 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3690 3691 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3692 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3693 3694 if (iscol_local) { 3695 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3696 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3697 } 3698 } 3699 PetscFunctionReturn(0); 3700 } 3701 3702 /* 3703 Not great since it makes two copies of the submatrix, first an SeqAIJ 3704 in local and then by concatenating the local matrices the end result. 3705 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3706 3707 Note: This requires a sequential iscol with all indices. 3708 */ 3709 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3710 { 3711 PetscErrorCode ierr; 3712 PetscMPIInt rank,size; 3713 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3714 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3715 Mat M,Mreuse; 3716 MatScalar *aa,*vwork; 3717 MPI_Comm comm; 3718 Mat_SeqAIJ *aij; 3719 PetscBool colflag,allcolumns=PETSC_FALSE; 3720 3721 PetscFunctionBegin; 3722 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3723 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3724 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3725 3726 /* Check for special case: each processor gets entire matrix columns */ 3727 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3728 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3729 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3730 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3731 3732 if (call == MAT_REUSE_MATRIX) { 3733 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3734 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3735 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3736 } else { 3737 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3738 } 3739 3740 /* 3741 m - number of local rows 3742 n - number of columns (same on all processors) 3743 rstart - first row in new global matrix generated 3744 */ 3745 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3746 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3747 if (call == MAT_INITIAL_MATRIX) { 3748 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3749 ii = aij->i; 3750 jj = aij->j; 3751 3752 /* 3753 Determine the number of non-zeros in the diagonal and off-diagonal 3754 portions of the matrix in order to do correct preallocation 3755 */ 3756 3757 /* first get start and end of "diagonal" columns */ 3758 if (csize == PETSC_DECIDE) { 3759 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3760 if (mglobal == n) { /* square matrix */ 3761 nlocal = m; 3762 } else { 3763 nlocal = n/size + ((n % size) > rank); 3764 } 3765 } else { 3766 nlocal = csize; 3767 } 3768 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3769 rstart = rend - nlocal; 3770 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3771 3772 /* next, compute all the lengths */ 3773 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3774 olens = dlens + m; 3775 for (i=0; i<m; i++) { 3776 jend = ii[i+1] - ii[i]; 3777 olen = 0; 3778 dlen = 0; 3779 for (j=0; j<jend; j++) { 3780 if (*jj < rstart || *jj >= rend) olen++; 3781 else dlen++; 3782 jj++; 3783 } 3784 olens[i] = olen; 3785 dlens[i] = dlen; 3786 } 3787 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3788 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3789 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3790 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3791 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3792 ierr = PetscFree(dlens);CHKERRQ(ierr); 3793 } else { 3794 PetscInt ml,nl; 3795 3796 M = *newmat; 3797 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3798 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3799 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3800 /* 3801 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3802 rather than the slower MatSetValues(). 3803 */ 3804 M->was_assembled = PETSC_TRUE; 3805 M->assembled = PETSC_FALSE; 3806 } 3807 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3808 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3809 ii = aij->i; 3810 jj = aij->j; 3811 aa = aij->a; 3812 for (i=0; i<m; i++) { 3813 row = rstart + i; 3814 nz = ii[i+1] - ii[i]; 3815 cwork = jj; jj += nz; 3816 vwork = aa; aa += nz; 3817 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3818 } 3819 3820 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3821 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3822 *newmat = M; 3823 3824 /* save submatrix used in processor for next request */ 3825 if (call == MAT_INITIAL_MATRIX) { 3826 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3827 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3828 } 3829 PetscFunctionReturn(0); 3830 } 3831 3832 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3833 { 3834 PetscInt m,cstart, cend,j,nnz,i,d; 3835 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3836 const PetscInt *JJ; 3837 PetscErrorCode ierr; 3838 PetscBool nooffprocentries; 3839 3840 PetscFunctionBegin; 3841 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3842 3843 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3844 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3845 m = B->rmap->n; 3846 cstart = B->cmap->rstart; 3847 cend = B->cmap->rend; 3848 rstart = B->rmap->rstart; 3849 3850 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3851 3852 if (PetscDefined(USE_DEBUG)) { 3853 for (i=0; i<m; i++) { 3854 nnz = Ii[i+1]- Ii[i]; 3855 JJ = J + Ii[i]; 3856 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3857 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3858 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3859 } 3860 } 3861 3862 for (i=0; i<m; i++) { 3863 nnz = Ii[i+1]- Ii[i]; 3864 JJ = J + Ii[i]; 3865 nnz_max = PetscMax(nnz_max,nnz); 3866 d = 0; 3867 for (j=0; j<nnz; j++) { 3868 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3869 } 3870 d_nnz[i] = d; 3871 o_nnz[i] = nnz - d; 3872 } 3873 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3874 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3875 3876 for (i=0; i<m; i++) { 3877 ii = i + rstart; 3878 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3879 } 3880 nooffprocentries = B->nooffprocentries; 3881 B->nooffprocentries = PETSC_TRUE; 3882 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3883 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3884 B->nooffprocentries = nooffprocentries; 3885 3886 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3887 PetscFunctionReturn(0); 3888 } 3889 3890 /*@ 3891 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3892 (the default parallel PETSc format). 3893 3894 Collective 3895 3896 Input Parameters: 3897 + B - the matrix 3898 . i - the indices into j for the start of each local row (starts with zero) 3899 . j - the column indices for each local row (starts with zero) 3900 - v - optional values in the matrix 3901 3902 Level: developer 3903 3904 Notes: 3905 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3906 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3907 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3908 3909 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3910 3911 The format which is used for the sparse matrix input, is equivalent to a 3912 row-major ordering.. i.e for the following matrix, the input data expected is 3913 as shown 3914 3915 $ 1 0 0 3916 $ 2 0 3 P0 3917 $ ------- 3918 $ 4 5 6 P1 3919 $ 3920 $ Process0 [P0]: rows_owned=[0,1] 3921 $ i = {0,1,3} [size = nrow+1 = 2+1] 3922 $ j = {0,0,2} [size = 3] 3923 $ v = {1,2,3} [size = 3] 3924 $ 3925 $ Process1 [P1]: rows_owned=[2] 3926 $ i = {0,3} [size = nrow+1 = 1+1] 3927 $ j = {0,1,2} [size = 3] 3928 $ v = {4,5,6} [size = 3] 3929 3930 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3931 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3932 @*/ 3933 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3934 { 3935 PetscErrorCode ierr; 3936 3937 PetscFunctionBegin; 3938 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3939 PetscFunctionReturn(0); 3940 } 3941 3942 /*@C 3943 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3944 (the default parallel PETSc format). For good matrix assembly performance 3945 the user should preallocate the matrix storage by setting the parameters 3946 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3947 performance can be increased by more than a factor of 50. 3948 3949 Collective 3950 3951 Input Parameters: 3952 + B - the matrix 3953 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3954 (same value is used for all local rows) 3955 . d_nnz - array containing the number of nonzeros in the various rows of the 3956 DIAGONAL portion of the local submatrix (possibly different for each row) 3957 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3958 The size of this array is equal to the number of local rows, i.e 'm'. 3959 For matrices that will be factored, you must leave room for (and set) 3960 the diagonal entry even if it is zero. 3961 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3962 submatrix (same value is used for all local rows). 3963 - o_nnz - array containing the number of nonzeros in the various rows of the 3964 OFF-DIAGONAL portion of the local submatrix (possibly different for 3965 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3966 structure. The size of this array is equal to the number 3967 of local rows, i.e 'm'. 3968 3969 If the *_nnz parameter is given then the *_nz parameter is ignored 3970 3971 The AIJ format (also called the Yale sparse matrix format or 3972 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3973 storage. The stored row and column indices begin with zero. 3974 See Users-Manual: ch_mat for details. 3975 3976 The parallel matrix is partitioned such that the first m0 rows belong to 3977 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3978 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3979 3980 The DIAGONAL portion of the local submatrix of a processor can be defined 3981 as the submatrix which is obtained by extraction the part corresponding to 3982 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3983 first row that belongs to the processor, r2 is the last row belonging to 3984 the this processor, and c1-c2 is range of indices of the local part of a 3985 vector suitable for applying the matrix to. This is an mxn matrix. In the 3986 common case of a square matrix, the row and column ranges are the same and 3987 the DIAGONAL part is also square. The remaining portion of the local 3988 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3989 3990 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3991 3992 You can call MatGetInfo() to get information on how effective the preallocation was; 3993 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3994 You can also run with the option -info and look for messages with the string 3995 malloc in them to see if additional memory allocation was needed. 3996 3997 Example usage: 3998 3999 Consider the following 8x8 matrix with 34 non-zero values, that is 4000 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4001 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4002 as follows: 4003 4004 .vb 4005 1 2 0 | 0 3 0 | 0 4 4006 Proc0 0 5 6 | 7 0 0 | 8 0 4007 9 0 10 | 11 0 0 | 12 0 4008 ------------------------------------- 4009 13 0 14 | 15 16 17 | 0 0 4010 Proc1 0 18 0 | 19 20 21 | 0 0 4011 0 0 0 | 22 23 0 | 24 0 4012 ------------------------------------- 4013 Proc2 25 26 27 | 0 0 28 | 29 0 4014 30 0 0 | 31 32 33 | 0 34 4015 .ve 4016 4017 This can be represented as a collection of submatrices as: 4018 4019 .vb 4020 A B C 4021 D E F 4022 G H I 4023 .ve 4024 4025 Where the submatrices A,B,C are owned by proc0, D,E,F are 4026 owned by proc1, G,H,I are owned by proc2. 4027 4028 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4029 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4030 The 'M','N' parameters are 8,8, and have the same values on all procs. 4031 4032 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4033 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4034 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4035 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4036 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4037 matrix, ans [DF] as another SeqAIJ matrix. 4038 4039 When d_nz, o_nz parameters are specified, d_nz storage elements are 4040 allocated for every row of the local diagonal submatrix, and o_nz 4041 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4042 One way to choose d_nz and o_nz is to use the max nonzerors per local 4043 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4044 In this case, the values of d_nz,o_nz are: 4045 .vb 4046 proc0 : dnz = 2, o_nz = 2 4047 proc1 : dnz = 3, o_nz = 2 4048 proc2 : dnz = 1, o_nz = 4 4049 .ve 4050 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4051 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4052 for proc3. i.e we are using 12+15+10=37 storage locations to store 4053 34 values. 4054 4055 When d_nnz, o_nnz parameters are specified, the storage is specified 4056 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4057 In the above case the values for d_nnz,o_nnz are: 4058 .vb 4059 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4060 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4061 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4062 .ve 4063 Here the space allocated is sum of all the above values i.e 34, and 4064 hence pre-allocation is perfect. 4065 4066 Level: intermediate 4067 4068 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4069 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4070 @*/ 4071 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4072 { 4073 PetscErrorCode ierr; 4074 4075 PetscFunctionBegin; 4076 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4077 PetscValidType(B,1); 4078 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4079 PetscFunctionReturn(0); 4080 } 4081 4082 /*@ 4083 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4084 CSR format for the local rows. 4085 4086 Collective 4087 4088 Input Parameters: 4089 + comm - MPI communicator 4090 . m - number of local rows (Cannot be PETSC_DECIDE) 4091 . n - This value should be the same as the local size used in creating the 4092 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4093 calculated if N is given) For square matrices n is almost always m. 4094 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4095 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4096 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4097 . j - column indices 4098 - a - matrix values 4099 4100 Output Parameter: 4101 . mat - the matrix 4102 4103 Level: intermediate 4104 4105 Notes: 4106 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4107 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4108 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4109 4110 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4111 4112 The format which is used for the sparse matrix input, is equivalent to a 4113 row-major ordering.. i.e for the following matrix, the input data expected is 4114 as shown 4115 4116 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4117 4118 $ 1 0 0 4119 $ 2 0 3 P0 4120 $ ------- 4121 $ 4 5 6 P1 4122 $ 4123 $ Process0 [P0]: rows_owned=[0,1] 4124 $ i = {0,1,3} [size = nrow+1 = 2+1] 4125 $ j = {0,0,2} [size = 3] 4126 $ v = {1,2,3} [size = 3] 4127 $ 4128 $ Process1 [P1]: rows_owned=[2] 4129 $ i = {0,3} [size = nrow+1 = 1+1] 4130 $ j = {0,1,2} [size = 3] 4131 $ v = {4,5,6} [size = 3] 4132 4133 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4134 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4135 @*/ 4136 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4137 { 4138 PetscErrorCode ierr; 4139 4140 PetscFunctionBegin; 4141 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4142 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4143 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4144 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4145 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4146 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4147 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4148 PetscFunctionReturn(0); 4149 } 4150 4151 /*@ 4152 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4153 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4154 4155 Collective 4156 4157 Input Parameters: 4158 + mat - the matrix 4159 . m - number of local rows (Cannot be PETSC_DECIDE) 4160 . n - This value should be the same as the local size used in creating the 4161 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4162 calculated if N is given) For square matrices n is almost always m. 4163 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4164 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4165 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4166 . J - column indices 4167 - v - matrix values 4168 4169 Level: intermediate 4170 4171 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4172 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4173 @*/ 4174 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4175 { 4176 PetscErrorCode ierr; 4177 PetscInt cstart,nnz,i,j; 4178 PetscInt *ld; 4179 PetscBool nooffprocentries; 4180 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4181 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4182 PetscScalar *ad = Ad->a, *ao = Ao->a; 4183 const PetscInt *Adi = Ad->i; 4184 PetscInt ldi,Iii,md; 4185 4186 PetscFunctionBegin; 4187 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4188 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4189 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4190 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4191 4192 cstart = mat->cmap->rstart; 4193 if (!Aij->ld) { 4194 /* count number of entries below block diagonal */ 4195 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4196 Aij->ld = ld; 4197 for (i=0; i<m; i++) { 4198 nnz = Ii[i+1]- Ii[i]; 4199 j = 0; 4200 while (J[j] < cstart && j < nnz) {j++;} 4201 J += nnz; 4202 ld[i] = j; 4203 } 4204 } else { 4205 ld = Aij->ld; 4206 } 4207 4208 for (i=0; i<m; i++) { 4209 nnz = Ii[i+1]- Ii[i]; 4210 Iii = Ii[i]; 4211 ldi = ld[i]; 4212 md = Adi[i+1]-Adi[i]; 4213 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4214 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4215 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4216 ad += md; 4217 ao += nnz - md; 4218 } 4219 nooffprocentries = mat->nooffprocentries; 4220 mat->nooffprocentries = PETSC_TRUE; 4221 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4222 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4223 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4224 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4225 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4226 mat->nooffprocentries = nooffprocentries; 4227 PetscFunctionReturn(0); 4228 } 4229 4230 /*@C 4231 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4232 (the default parallel PETSc format). For good matrix assembly performance 4233 the user should preallocate the matrix storage by setting the parameters 4234 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4235 performance can be increased by more than a factor of 50. 4236 4237 Collective 4238 4239 Input Parameters: 4240 + comm - MPI communicator 4241 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4242 This value should be the same as the local size used in creating the 4243 y vector for the matrix-vector product y = Ax. 4244 . n - This value should be the same as the local size used in creating the 4245 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4246 calculated if N is given) For square matrices n is almost always m. 4247 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4248 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4249 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4250 (same value is used for all local rows) 4251 . d_nnz - array containing the number of nonzeros in the various rows of the 4252 DIAGONAL portion of the local submatrix (possibly different for each row) 4253 or NULL, if d_nz is used to specify the nonzero structure. 4254 The size of this array is equal to the number of local rows, i.e 'm'. 4255 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4256 submatrix (same value is used for all local rows). 4257 - o_nnz - array containing the number of nonzeros in the various rows of the 4258 OFF-DIAGONAL portion of the local submatrix (possibly different for 4259 each row) or NULL, if o_nz is used to specify the nonzero 4260 structure. The size of this array is equal to the number 4261 of local rows, i.e 'm'. 4262 4263 Output Parameter: 4264 . A - the matrix 4265 4266 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4267 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4268 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4269 4270 Notes: 4271 If the *_nnz parameter is given then the *_nz parameter is ignored 4272 4273 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4274 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4275 storage requirements for this matrix. 4276 4277 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4278 processor than it must be used on all processors that share the object for 4279 that argument. 4280 4281 The user MUST specify either the local or global matrix dimensions 4282 (possibly both). 4283 4284 The parallel matrix is partitioned across processors such that the 4285 first m0 rows belong to process 0, the next m1 rows belong to 4286 process 1, the next m2 rows belong to process 2 etc.. where 4287 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4288 values corresponding to [m x N] submatrix. 4289 4290 The columns are logically partitioned with the n0 columns belonging 4291 to 0th partition, the next n1 columns belonging to the next 4292 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4293 4294 The DIAGONAL portion of the local submatrix on any given processor 4295 is the submatrix corresponding to the rows and columns m,n 4296 corresponding to the given processor. i.e diagonal matrix on 4297 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4298 etc. The remaining portion of the local submatrix [m x (N-n)] 4299 constitute the OFF-DIAGONAL portion. The example below better 4300 illustrates this concept. 4301 4302 For a square global matrix we define each processor's diagonal portion 4303 to be its local rows and the corresponding columns (a square submatrix); 4304 each processor's off-diagonal portion encompasses the remainder of the 4305 local matrix (a rectangular submatrix). 4306 4307 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4308 4309 When calling this routine with a single process communicator, a matrix of 4310 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4311 type of communicator, use the construction mechanism 4312 .vb 4313 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4314 .ve 4315 4316 $ MatCreate(...,&A); 4317 $ MatSetType(A,MATMPIAIJ); 4318 $ MatSetSizes(A, m,n,M,N); 4319 $ MatMPIAIJSetPreallocation(A,...); 4320 4321 By default, this format uses inodes (identical nodes) when possible. 4322 We search for consecutive rows with the same nonzero structure, thereby 4323 reusing matrix information to achieve increased efficiency. 4324 4325 Options Database Keys: 4326 + -mat_no_inode - Do not use inodes 4327 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4328 4329 4330 4331 Example usage: 4332 4333 Consider the following 8x8 matrix with 34 non-zero values, that is 4334 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4335 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4336 as follows 4337 4338 .vb 4339 1 2 0 | 0 3 0 | 0 4 4340 Proc0 0 5 6 | 7 0 0 | 8 0 4341 9 0 10 | 11 0 0 | 12 0 4342 ------------------------------------- 4343 13 0 14 | 15 16 17 | 0 0 4344 Proc1 0 18 0 | 19 20 21 | 0 0 4345 0 0 0 | 22 23 0 | 24 0 4346 ------------------------------------- 4347 Proc2 25 26 27 | 0 0 28 | 29 0 4348 30 0 0 | 31 32 33 | 0 34 4349 .ve 4350 4351 This can be represented as a collection of submatrices as 4352 4353 .vb 4354 A B C 4355 D E F 4356 G H I 4357 .ve 4358 4359 Where the submatrices A,B,C are owned by proc0, D,E,F are 4360 owned by proc1, G,H,I are owned by proc2. 4361 4362 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4363 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4364 The 'M','N' parameters are 8,8, and have the same values on all procs. 4365 4366 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4367 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4368 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4369 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4370 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4371 matrix, ans [DF] as another SeqAIJ matrix. 4372 4373 When d_nz, o_nz parameters are specified, d_nz storage elements are 4374 allocated for every row of the local diagonal submatrix, and o_nz 4375 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4376 One way to choose d_nz and o_nz is to use the max nonzerors per local 4377 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4378 In this case, the values of d_nz,o_nz are 4379 .vb 4380 proc0 : dnz = 2, o_nz = 2 4381 proc1 : dnz = 3, o_nz = 2 4382 proc2 : dnz = 1, o_nz = 4 4383 .ve 4384 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4385 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4386 for proc3. i.e we are using 12+15+10=37 storage locations to store 4387 34 values. 4388 4389 When d_nnz, o_nnz parameters are specified, the storage is specified 4390 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4391 In the above case the values for d_nnz,o_nnz are 4392 .vb 4393 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4394 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4395 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4396 .ve 4397 Here the space allocated is sum of all the above values i.e 34, and 4398 hence pre-allocation is perfect. 4399 4400 Level: intermediate 4401 4402 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4403 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4404 @*/ 4405 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4406 { 4407 PetscErrorCode ierr; 4408 PetscMPIInt size; 4409 4410 PetscFunctionBegin; 4411 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4412 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4413 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4414 if (size > 1) { 4415 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4416 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4417 } else { 4418 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4419 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4420 } 4421 PetscFunctionReturn(0); 4422 } 4423 4424 /*@C 4425 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4426 4427 Not collective 4428 4429 Input Parameter: 4430 . A - The MPIAIJ matrix 4431 4432 Output Parameters: 4433 + Ad - The local diagonal block as a SeqAIJ matrix 4434 . Ao - The local off-diagonal block as a SeqAIJ matrix 4435 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4436 4437 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4438 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4439 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4440 local column numbers to global column numbers in the original matrix. 4441 4442 Level: intermediate 4443 4444 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4445 @*/ 4446 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4447 { 4448 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4449 PetscBool flg; 4450 PetscErrorCode ierr; 4451 4452 PetscFunctionBegin; 4453 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4454 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4455 if (Ad) *Ad = a->A; 4456 if (Ao) *Ao = a->B; 4457 if (colmap) *colmap = a->garray; 4458 PetscFunctionReturn(0); 4459 } 4460 4461 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4462 { 4463 PetscErrorCode ierr; 4464 PetscInt m,N,i,rstart,nnz,Ii; 4465 PetscInt *indx; 4466 PetscScalar *values; 4467 4468 PetscFunctionBegin; 4469 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4470 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4471 PetscInt *dnz,*onz,sum,bs,cbs; 4472 4473 if (n == PETSC_DECIDE) { 4474 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4475 } 4476 /* Check sum(n) = N */ 4477 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4478 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4479 4480 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4481 rstart -= m; 4482 4483 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4484 for (i=0; i<m; i++) { 4485 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4486 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4487 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4488 } 4489 4490 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4491 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4492 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4493 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4494 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4495 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4496 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4497 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4498 } 4499 4500 /* numeric phase */ 4501 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4502 for (i=0; i<m; i++) { 4503 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4504 Ii = i + rstart; 4505 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4506 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4507 } 4508 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4509 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4514 { 4515 PetscErrorCode ierr; 4516 PetscMPIInt rank; 4517 PetscInt m,N,i,rstart,nnz; 4518 size_t len; 4519 const PetscInt *indx; 4520 PetscViewer out; 4521 char *name; 4522 Mat B; 4523 const PetscScalar *values; 4524 4525 PetscFunctionBegin; 4526 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4527 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4528 /* Should this be the type of the diagonal block of A? */ 4529 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4530 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4531 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4532 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4533 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4534 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4535 for (i=0; i<m; i++) { 4536 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4537 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4538 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4539 } 4540 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4541 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4542 4543 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4544 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4545 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4546 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4547 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4548 ierr = PetscFree(name);CHKERRQ(ierr); 4549 ierr = MatView(B,out);CHKERRQ(ierr); 4550 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4551 ierr = MatDestroy(&B);CHKERRQ(ierr); 4552 PetscFunctionReturn(0); 4553 } 4554 4555 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4556 { 4557 PetscErrorCode ierr; 4558 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4559 4560 PetscFunctionBegin; 4561 if (!merge) PetscFunctionReturn(0); 4562 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4563 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4564 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4565 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4566 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4567 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4568 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4569 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4570 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4573 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4574 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4575 ierr = PetscFree(merge);CHKERRQ(ierr); 4576 PetscFunctionReturn(0); 4577 } 4578 4579 #include <../src/mat/utils/freespace.h> 4580 #include <petscbt.h> 4581 4582 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4583 { 4584 PetscErrorCode ierr; 4585 MPI_Comm comm; 4586 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4587 PetscMPIInt size,rank,taga,*len_s; 4588 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4589 PetscInt proc,m; 4590 PetscInt **buf_ri,**buf_rj; 4591 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4592 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4593 MPI_Request *s_waits,*r_waits; 4594 MPI_Status *status; 4595 MatScalar *aa=a->a; 4596 MatScalar **abuf_r,*ba_i; 4597 Mat_Merge_SeqsToMPI *merge; 4598 PetscContainer container; 4599 4600 PetscFunctionBegin; 4601 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4602 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4603 4604 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4605 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4606 4607 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4608 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4609 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4610 4611 bi = merge->bi; 4612 bj = merge->bj; 4613 buf_ri = merge->buf_ri; 4614 buf_rj = merge->buf_rj; 4615 4616 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4617 owners = merge->rowmap->range; 4618 len_s = merge->len_s; 4619 4620 /* send and recv matrix values */ 4621 /*-----------------------------*/ 4622 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4623 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4624 4625 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4626 for (proc=0,k=0; proc<size; proc++) { 4627 if (!len_s[proc]) continue; 4628 i = owners[proc]; 4629 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4630 k++; 4631 } 4632 4633 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4634 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4635 ierr = PetscFree(status);CHKERRQ(ierr); 4636 4637 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4638 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4639 4640 /* insert mat values of mpimat */ 4641 /*----------------------------*/ 4642 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4643 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4644 4645 for (k=0; k<merge->nrecv; k++) { 4646 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4647 nrows = *(buf_ri_k[k]); 4648 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4649 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4650 } 4651 4652 /* set values of ba */ 4653 m = merge->rowmap->n; 4654 for (i=0; i<m; i++) { 4655 arow = owners[rank] + i; 4656 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4657 bnzi = bi[i+1] - bi[i]; 4658 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4659 4660 /* add local non-zero vals of this proc's seqmat into ba */ 4661 anzi = ai[arow+1] - ai[arow]; 4662 aj = a->j + ai[arow]; 4663 aa = a->a + ai[arow]; 4664 nextaj = 0; 4665 for (j=0; nextaj<anzi; j++) { 4666 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4667 ba_i[j] += aa[nextaj++]; 4668 } 4669 } 4670 4671 /* add received vals into ba */ 4672 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4673 /* i-th row */ 4674 if (i == *nextrow[k]) { 4675 anzi = *(nextai[k]+1) - *nextai[k]; 4676 aj = buf_rj[k] + *(nextai[k]); 4677 aa = abuf_r[k] + *(nextai[k]); 4678 nextaj = 0; 4679 for (j=0; nextaj<anzi; j++) { 4680 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4681 ba_i[j] += aa[nextaj++]; 4682 } 4683 } 4684 nextrow[k]++; nextai[k]++; 4685 } 4686 } 4687 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4688 } 4689 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4690 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4691 4692 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4693 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4694 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4695 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4696 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4697 PetscFunctionReturn(0); 4698 } 4699 4700 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4701 { 4702 PetscErrorCode ierr; 4703 Mat B_mpi; 4704 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4705 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4706 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4707 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4708 PetscInt len,proc,*dnz,*onz,bs,cbs; 4709 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4710 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4711 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4712 MPI_Status *status; 4713 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4714 PetscBT lnkbt; 4715 Mat_Merge_SeqsToMPI *merge; 4716 PetscContainer container; 4717 4718 PetscFunctionBegin; 4719 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4720 4721 /* make sure it is a PETSc comm */ 4722 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4723 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4724 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4725 4726 ierr = PetscNew(&merge);CHKERRQ(ierr); 4727 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4728 4729 /* determine row ownership */ 4730 /*---------------------------------------------------------*/ 4731 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4732 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4733 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4734 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4735 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4736 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4737 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4738 4739 m = merge->rowmap->n; 4740 owners = merge->rowmap->range; 4741 4742 /* determine the number of messages to send, their lengths */ 4743 /*---------------------------------------------------------*/ 4744 len_s = merge->len_s; 4745 4746 len = 0; /* length of buf_si[] */ 4747 merge->nsend = 0; 4748 for (proc=0; proc<size; proc++) { 4749 len_si[proc] = 0; 4750 if (proc == rank) { 4751 len_s[proc] = 0; 4752 } else { 4753 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4754 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4755 } 4756 if (len_s[proc]) { 4757 merge->nsend++; 4758 nrows = 0; 4759 for (i=owners[proc]; i<owners[proc+1]; i++) { 4760 if (ai[i+1] > ai[i]) nrows++; 4761 } 4762 len_si[proc] = 2*(nrows+1); 4763 len += len_si[proc]; 4764 } 4765 } 4766 4767 /* determine the number and length of messages to receive for ij-structure */ 4768 /*-------------------------------------------------------------------------*/ 4769 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4770 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4771 4772 /* post the Irecv of j-structure */ 4773 /*-------------------------------*/ 4774 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4775 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4776 4777 /* post the Isend of j-structure */ 4778 /*--------------------------------*/ 4779 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4780 4781 for (proc=0, k=0; proc<size; proc++) { 4782 if (!len_s[proc]) continue; 4783 i = owners[proc]; 4784 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4785 k++; 4786 } 4787 4788 /* receives and sends of j-structure are complete */ 4789 /*------------------------------------------------*/ 4790 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4791 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4792 4793 /* send and recv i-structure */ 4794 /*---------------------------*/ 4795 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4796 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4797 4798 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4799 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4800 for (proc=0,k=0; proc<size; proc++) { 4801 if (!len_s[proc]) continue; 4802 /* form outgoing message for i-structure: 4803 buf_si[0]: nrows to be sent 4804 [1:nrows]: row index (global) 4805 [nrows+1:2*nrows+1]: i-structure index 4806 */ 4807 /*-------------------------------------------*/ 4808 nrows = len_si[proc]/2 - 1; 4809 buf_si_i = buf_si + nrows+1; 4810 buf_si[0] = nrows; 4811 buf_si_i[0] = 0; 4812 nrows = 0; 4813 for (i=owners[proc]; i<owners[proc+1]; i++) { 4814 anzi = ai[i+1] - ai[i]; 4815 if (anzi) { 4816 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4817 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4818 nrows++; 4819 } 4820 } 4821 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4822 k++; 4823 buf_si += len_si[proc]; 4824 } 4825 4826 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4827 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4828 4829 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4830 for (i=0; i<merge->nrecv; i++) { 4831 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4832 } 4833 4834 ierr = PetscFree(len_si);CHKERRQ(ierr); 4835 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4836 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4837 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4838 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4839 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4840 ierr = PetscFree(status);CHKERRQ(ierr); 4841 4842 /* compute a local seq matrix in each processor */ 4843 /*----------------------------------------------*/ 4844 /* allocate bi array and free space for accumulating nonzero column info */ 4845 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4846 bi[0] = 0; 4847 4848 /* create and initialize a linked list */ 4849 nlnk = N+1; 4850 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4851 4852 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4853 len = ai[owners[rank+1]] - ai[owners[rank]]; 4854 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4855 4856 current_space = free_space; 4857 4858 /* determine symbolic info for each local row */ 4859 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4860 4861 for (k=0; k<merge->nrecv; k++) { 4862 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4863 nrows = *buf_ri_k[k]; 4864 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4865 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4866 } 4867 4868 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4869 len = 0; 4870 for (i=0; i<m; i++) { 4871 bnzi = 0; 4872 /* add local non-zero cols of this proc's seqmat into lnk */ 4873 arow = owners[rank] + i; 4874 anzi = ai[arow+1] - ai[arow]; 4875 aj = a->j + ai[arow]; 4876 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4877 bnzi += nlnk; 4878 /* add received col data into lnk */ 4879 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4880 if (i == *nextrow[k]) { /* i-th row */ 4881 anzi = *(nextai[k]+1) - *nextai[k]; 4882 aj = buf_rj[k] + *nextai[k]; 4883 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4884 bnzi += nlnk; 4885 nextrow[k]++; nextai[k]++; 4886 } 4887 } 4888 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4889 4890 /* if free space is not available, make more free space */ 4891 if (current_space->local_remaining<bnzi) { 4892 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4893 nspacedouble++; 4894 } 4895 /* copy data into free space, then initialize lnk */ 4896 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4897 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4898 4899 current_space->array += bnzi; 4900 current_space->local_used += bnzi; 4901 current_space->local_remaining -= bnzi; 4902 4903 bi[i+1] = bi[i] + bnzi; 4904 } 4905 4906 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4907 4908 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4909 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4910 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4911 4912 /* create symbolic parallel matrix B_mpi */ 4913 /*---------------------------------------*/ 4914 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4915 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4916 if (n==PETSC_DECIDE) { 4917 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4918 } else { 4919 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4920 } 4921 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4922 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4923 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4924 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4925 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4926 4927 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4928 B_mpi->assembled = PETSC_FALSE; 4929 merge->bi = bi; 4930 merge->bj = bj; 4931 merge->buf_ri = buf_ri; 4932 merge->buf_rj = buf_rj; 4933 merge->coi = NULL; 4934 merge->coj = NULL; 4935 merge->owners_co = NULL; 4936 4937 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4938 4939 /* attach the supporting struct to B_mpi for reuse */ 4940 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4941 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4942 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4943 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4944 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4945 *mpimat = B_mpi; 4946 4947 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4948 PetscFunctionReturn(0); 4949 } 4950 4951 /*@C 4952 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4953 matrices from each processor 4954 4955 Collective 4956 4957 Input Parameters: 4958 + comm - the communicators the parallel matrix will live on 4959 . seqmat - the input sequential matrices 4960 . m - number of local rows (or PETSC_DECIDE) 4961 . n - number of local columns (or PETSC_DECIDE) 4962 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4963 4964 Output Parameter: 4965 . mpimat - the parallel matrix generated 4966 4967 Level: advanced 4968 4969 Notes: 4970 The dimensions of the sequential matrix in each processor MUST be the same. 4971 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4972 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4973 @*/ 4974 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4975 { 4976 PetscErrorCode ierr; 4977 PetscMPIInt size; 4978 4979 PetscFunctionBegin; 4980 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4981 if (size == 1) { 4982 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4983 if (scall == MAT_INITIAL_MATRIX) { 4984 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4985 } else { 4986 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4987 } 4988 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4989 PetscFunctionReturn(0); 4990 } 4991 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4992 if (scall == MAT_INITIAL_MATRIX) { 4993 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4994 } 4995 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4996 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4997 PetscFunctionReturn(0); 4998 } 4999 5000 /*@ 5001 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5002 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5003 with MatGetSize() 5004 5005 Not Collective 5006 5007 Input Parameters: 5008 + A - the matrix 5009 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5010 5011 Output Parameter: 5012 . A_loc - the local sequential matrix generated 5013 5014 Level: developer 5015 5016 Notes: 5017 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5018 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5019 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5020 modify the values of the returned A_loc. 5021 5022 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5023 5024 @*/ 5025 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5026 { 5027 PetscErrorCode ierr; 5028 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5029 Mat_SeqAIJ *mat,*a,*b; 5030 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5031 MatScalar *aa,*ba,*cam; 5032 PetscScalar *ca; 5033 PetscMPIInt size; 5034 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5035 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5036 PetscBool match; 5037 5038 PetscFunctionBegin; 5039 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5040 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5041 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 5042 if (size == 1) { 5043 if (scall == MAT_INITIAL_MATRIX) { 5044 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 5045 *A_loc = mpimat->A; 5046 } else if (scall == MAT_REUSE_MATRIX) { 5047 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5048 } 5049 PetscFunctionReturn(0); 5050 } 5051 5052 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5053 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5054 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5055 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5056 aa = a->a; ba = b->a; 5057 if (scall == MAT_INITIAL_MATRIX) { 5058 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5059 ci[0] = 0; 5060 for (i=0; i<am; i++) { 5061 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5062 } 5063 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5064 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5065 k = 0; 5066 for (i=0; i<am; i++) { 5067 ncols_o = bi[i+1] - bi[i]; 5068 ncols_d = ai[i+1] - ai[i]; 5069 /* off-diagonal portion of A */ 5070 for (jo=0; jo<ncols_o; jo++) { 5071 col = cmap[*bj]; 5072 if (col >= cstart) break; 5073 cj[k] = col; bj++; 5074 ca[k++] = *ba++; 5075 } 5076 /* diagonal portion of A */ 5077 for (j=0; j<ncols_d; j++) { 5078 cj[k] = cstart + *aj++; 5079 ca[k++] = *aa++; 5080 } 5081 /* off-diagonal portion of A */ 5082 for (j=jo; j<ncols_o; j++) { 5083 cj[k] = cmap[*bj++]; 5084 ca[k++] = *ba++; 5085 } 5086 } 5087 /* put together the new matrix */ 5088 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5089 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5090 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5091 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5092 mat->free_a = PETSC_TRUE; 5093 mat->free_ij = PETSC_TRUE; 5094 mat->nonew = 0; 5095 } else if (scall == MAT_REUSE_MATRIX) { 5096 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5097 ci = mat->i; cj = mat->j; cam = mat->a; 5098 for (i=0; i<am; i++) { 5099 /* off-diagonal portion of A */ 5100 ncols_o = bi[i+1] - bi[i]; 5101 for (jo=0; jo<ncols_o; jo++) { 5102 col = cmap[*bj]; 5103 if (col >= cstart) break; 5104 *cam++ = *ba++; bj++; 5105 } 5106 /* diagonal portion of A */ 5107 ncols_d = ai[i+1] - ai[i]; 5108 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5109 /* off-diagonal portion of A */ 5110 for (j=jo; j<ncols_o; j++) { 5111 *cam++ = *ba++; bj++; 5112 } 5113 } 5114 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5115 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5116 PetscFunctionReturn(0); 5117 } 5118 5119 /*@C 5120 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5121 5122 Not Collective 5123 5124 Input Parameters: 5125 + A - the matrix 5126 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5127 - row, col - index sets of rows and columns to extract (or NULL) 5128 5129 Output Parameter: 5130 . A_loc - the local sequential matrix generated 5131 5132 Level: developer 5133 5134 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5135 5136 @*/ 5137 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5138 { 5139 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5140 PetscErrorCode ierr; 5141 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5142 IS isrowa,iscola; 5143 Mat *aloc; 5144 PetscBool match; 5145 5146 PetscFunctionBegin; 5147 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5148 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5149 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5150 if (!row) { 5151 start = A->rmap->rstart; end = A->rmap->rend; 5152 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5153 } else { 5154 isrowa = *row; 5155 } 5156 if (!col) { 5157 start = A->cmap->rstart; 5158 cmap = a->garray; 5159 nzA = a->A->cmap->n; 5160 nzB = a->B->cmap->n; 5161 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5162 ncols = 0; 5163 for (i=0; i<nzB; i++) { 5164 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5165 else break; 5166 } 5167 imark = i; 5168 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5169 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5170 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5171 } else { 5172 iscola = *col; 5173 } 5174 if (scall != MAT_INITIAL_MATRIX) { 5175 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5176 aloc[0] = *A_loc; 5177 } 5178 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5179 if (!col) { /* attach global id of condensed columns */ 5180 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5181 } 5182 *A_loc = aloc[0]; 5183 ierr = PetscFree(aloc);CHKERRQ(ierr); 5184 if (!row) { 5185 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5186 } 5187 if (!col) { 5188 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5189 } 5190 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5191 PetscFunctionReturn(0); 5192 } 5193 5194 /* 5195 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5196 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5197 * on a global size. 5198 * */ 5199 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5200 { 5201 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5202 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5203 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5204 PetscMPIInt owner; 5205 PetscSFNode *iremote,*oiremote; 5206 const PetscInt *lrowindices; 5207 PetscErrorCode ierr; 5208 PetscSF sf,osf; 5209 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5210 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5211 MPI_Comm comm; 5212 ISLocalToGlobalMapping mapping; 5213 5214 PetscFunctionBegin; 5215 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5216 /* plocalsize is the number of roots 5217 * nrows is the number of leaves 5218 * */ 5219 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5220 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5221 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5222 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5223 for (i=0;i<nrows;i++) { 5224 /* Find a remote index and an owner for a row 5225 * The row could be local or remote 5226 * */ 5227 owner = 0; 5228 lidx = 0; 5229 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5230 iremote[i].index = lidx; 5231 iremote[i].rank = owner; 5232 } 5233 /* Create SF to communicate how many nonzero columns for each row */ 5234 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5235 /* SF will figure out the number of nonzero colunms for each row, and their 5236 * offsets 5237 * */ 5238 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5239 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5240 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5241 5242 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5243 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5244 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5245 roffsets[0] = 0; 5246 roffsets[1] = 0; 5247 for (i=0;i<plocalsize;i++) { 5248 /* diag */ 5249 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5250 /* off diag */ 5251 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5252 /* compute offsets so that we relative location for each row */ 5253 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5254 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5255 } 5256 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5257 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5258 /* 'r' means root, and 'l' means leaf */ 5259 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5260 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5261 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5262 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5263 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5264 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5265 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5266 dntotalcols = 0; 5267 ontotalcols = 0; 5268 ncol = 0; 5269 for (i=0;i<nrows;i++) { 5270 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5271 ncol = PetscMax(pnnz[i],ncol); 5272 /* diag */ 5273 dntotalcols += nlcols[i*2+0]; 5274 /* off diag */ 5275 ontotalcols += nlcols[i*2+1]; 5276 } 5277 /* We do not need to figure the right number of columns 5278 * since all the calculations will be done by going through the raw data 5279 * */ 5280 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5281 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5282 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5283 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5284 /* diag */ 5285 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5286 /* off diag */ 5287 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5288 /* diag */ 5289 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5290 /* off diag */ 5291 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5292 dntotalcols = 0; 5293 ontotalcols = 0; 5294 ntotalcols = 0; 5295 for (i=0;i<nrows;i++) { 5296 owner = 0; 5297 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5298 /* Set iremote for diag matrix */ 5299 for (j=0;j<nlcols[i*2+0];j++) { 5300 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5301 iremote[dntotalcols].rank = owner; 5302 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5303 ilocal[dntotalcols++] = ntotalcols++; 5304 } 5305 /* off diag */ 5306 for (j=0;j<nlcols[i*2+1];j++) { 5307 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5308 oiremote[ontotalcols].rank = owner; 5309 oilocal[ontotalcols++] = ntotalcols++; 5310 } 5311 } 5312 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5313 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5314 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5315 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5316 /* P serves as roots and P_oth is leaves 5317 * Diag matrix 5318 * */ 5319 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5320 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5321 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5322 5323 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5324 /* Off diag */ 5325 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5326 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5327 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5328 /* We operate on the matrix internal data for saving memory */ 5329 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5330 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5331 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5332 /* Convert to global indices for diag matrix */ 5333 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5334 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5335 /* We want P_oth store global indices */ 5336 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5337 /* Use memory scalable approach */ 5338 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5339 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5340 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5341 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5342 /* Convert back to local indices */ 5343 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5344 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5345 nout = 0; 5346 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5347 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5348 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5349 /* Exchange values */ 5350 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5351 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5352 /* Stop PETSc from shrinking memory */ 5353 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5354 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5355 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5356 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5357 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5358 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5359 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5360 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5361 PetscFunctionReturn(0); 5362 } 5363 5364 /* 5365 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5366 * This supports MPIAIJ and MAIJ 5367 * */ 5368 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5369 { 5370 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5371 Mat_SeqAIJ *p_oth; 5372 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5373 IS rows,map; 5374 PetscHMapI hamp; 5375 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5376 MPI_Comm comm; 5377 PetscSF sf,osf; 5378 PetscBool has; 5379 PetscErrorCode ierr; 5380 5381 PetscFunctionBegin; 5382 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5383 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5384 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5385 * and then create a submatrix (that often is an overlapping matrix) 5386 * */ 5387 if (reuse == MAT_INITIAL_MATRIX) { 5388 /* Use a hash table to figure out unique keys */ 5389 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5390 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5391 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5392 count = 0; 5393 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5394 for (i=0;i<a->B->cmap->n;i++) { 5395 key = a->garray[i]/dof; 5396 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5397 if (!has) { 5398 mapping[i] = count; 5399 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5400 } else { 5401 /* Current 'i' has the same value the previous step */ 5402 mapping[i] = count-1; 5403 } 5404 } 5405 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5406 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5407 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5408 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5409 off = 0; 5410 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5411 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5412 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5413 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5414 /* In case, the matrix was already created but users want to recreate the matrix */ 5415 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5416 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5417 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5418 ierr = ISDestroy(&map);CHKERRQ(ierr); 5419 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5420 } else if (reuse == MAT_REUSE_MATRIX) { 5421 /* If matrix was already created, we simply update values using SF objects 5422 * that as attached to the matrix ealier. 5423 * */ 5424 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5425 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5426 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5427 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5428 /* Update values in place */ 5429 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5430 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5431 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5432 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5433 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5434 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5435 PetscFunctionReturn(0); 5436 } 5437 5438 /*@C 5439 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5440 5441 Collective on Mat 5442 5443 Input Parameters: 5444 + A,B - the matrices in mpiaij format 5445 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5446 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5447 5448 Output Parameter: 5449 + rowb, colb - index sets of rows and columns of B to extract 5450 - B_seq - the sequential matrix generated 5451 5452 Level: developer 5453 5454 @*/ 5455 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5456 { 5457 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5458 PetscErrorCode ierr; 5459 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5460 IS isrowb,iscolb; 5461 Mat *bseq=NULL; 5462 5463 PetscFunctionBegin; 5464 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5465 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5466 } 5467 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5468 5469 if (scall == MAT_INITIAL_MATRIX) { 5470 start = A->cmap->rstart; 5471 cmap = a->garray; 5472 nzA = a->A->cmap->n; 5473 nzB = a->B->cmap->n; 5474 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5475 ncols = 0; 5476 for (i=0; i<nzB; i++) { /* row < local row index */ 5477 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5478 else break; 5479 } 5480 imark = i; 5481 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5482 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5483 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5484 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5485 } else { 5486 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5487 isrowb = *rowb; iscolb = *colb; 5488 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5489 bseq[0] = *B_seq; 5490 } 5491 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5492 *B_seq = bseq[0]; 5493 ierr = PetscFree(bseq);CHKERRQ(ierr); 5494 if (!rowb) { 5495 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5496 } else { 5497 *rowb = isrowb; 5498 } 5499 if (!colb) { 5500 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5501 } else { 5502 *colb = iscolb; 5503 } 5504 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5505 PetscFunctionReturn(0); 5506 } 5507 5508 /* 5509 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5510 of the OFF-DIAGONAL portion of local A 5511 5512 Collective on Mat 5513 5514 Input Parameters: 5515 + A,B - the matrices in mpiaij format 5516 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5517 5518 Output Parameter: 5519 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5520 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5521 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5522 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5523 5524 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5525 for this matrix. This is not desirable.. 5526 5527 Level: developer 5528 5529 */ 5530 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5531 { 5532 PetscErrorCode ierr; 5533 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5534 Mat_SeqAIJ *b_oth; 5535 VecScatter ctx; 5536 MPI_Comm comm; 5537 const PetscMPIInt *rprocs,*sprocs; 5538 const PetscInt *srow,*rstarts,*sstarts; 5539 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5540 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5541 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5542 MPI_Request *rwaits = NULL,*swaits = NULL; 5543 MPI_Status rstatus; 5544 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5545 5546 PetscFunctionBegin; 5547 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5548 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5549 5550 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5551 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5552 } 5553 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5554 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5555 5556 if (size == 1) { 5557 startsj_s = NULL; 5558 bufa_ptr = NULL; 5559 *B_oth = NULL; 5560 PetscFunctionReturn(0); 5561 } 5562 5563 ctx = a->Mvctx; 5564 tag = ((PetscObject)ctx)->tag; 5565 5566 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5567 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5568 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5569 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5570 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5571 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5572 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5573 5574 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5575 if (scall == MAT_INITIAL_MATRIX) { 5576 /* i-array */ 5577 /*---------*/ 5578 /* post receives */ 5579 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5580 for (i=0; i<nrecvs; i++) { 5581 rowlen = rvalues + rstarts[i]*rbs; 5582 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5583 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5584 } 5585 5586 /* pack the outgoing message */ 5587 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5588 5589 sstartsj[0] = 0; 5590 rstartsj[0] = 0; 5591 len = 0; /* total length of j or a array to be sent */ 5592 if (nsends) { 5593 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5594 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5595 } 5596 for (i=0; i<nsends; i++) { 5597 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5598 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5599 for (j=0; j<nrows; j++) { 5600 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5601 for (l=0; l<sbs; l++) { 5602 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5603 5604 rowlen[j*sbs+l] = ncols; 5605 5606 len += ncols; 5607 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5608 } 5609 k++; 5610 } 5611 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5612 5613 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5614 } 5615 /* recvs and sends of i-array are completed */ 5616 i = nrecvs; 5617 while (i--) { 5618 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5619 } 5620 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5621 ierr = PetscFree(svalues);CHKERRQ(ierr); 5622 5623 /* allocate buffers for sending j and a arrays */ 5624 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5625 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5626 5627 /* create i-array of B_oth */ 5628 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5629 5630 b_othi[0] = 0; 5631 len = 0; /* total length of j or a array to be received */ 5632 k = 0; 5633 for (i=0; i<nrecvs; i++) { 5634 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5635 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5636 for (j=0; j<nrows; j++) { 5637 b_othi[k+1] = b_othi[k] + rowlen[j]; 5638 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5639 k++; 5640 } 5641 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5642 } 5643 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5644 5645 /* allocate space for j and a arrrays of B_oth */ 5646 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5647 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5648 5649 /* j-array */ 5650 /*---------*/ 5651 /* post receives of j-array */ 5652 for (i=0; i<nrecvs; i++) { 5653 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5654 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5655 } 5656 5657 /* pack the outgoing message j-array */ 5658 if (nsends) k = sstarts[0]; 5659 for (i=0; i<nsends; i++) { 5660 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5661 bufJ = bufj+sstartsj[i]; 5662 for (j=0; j<nrows; j++) { 5663 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5664 for (ll=0; ll<sbs; ll++) { 5665 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5666 for (l=0; l<ncols; l++) { 5667 *bufJ++ = cols[l]; 5668 } 5669 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5670 } 5671 } 5672 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5673 } 5674 5675 /* recvs and sends of j-array are completed */ 5676 i = nrecvs; 5677 while (i--) { 5678 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5679 } 5680 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5681 } else if (scall == MAT_REUSE_MATRIX) { 5682 sstartsj = *startsj_s; 5683 rstartsj = *startsj_r; 5684 bufa = *bufa_ptr; 5685 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5686 b_otha = b_oth->a; 5687 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5688 5689 /* a-array */ 5690 /*---------*/ 5691 /* post receives of a-array */ 5692 for (i=0; i<nrecvs; i++) { 5693 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5694 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5695 } 5696 5697 /* pack the outgoing message a-array */ 5698 if (nsends) k = sstarts[0]; 5699 for (i=0; i<nsends; i++) { 5700 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5701 bufA = bufa+sstartsj[i]; 5702 for (j=0; j<nrows; j++) { 5703 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5704 for (ll=0; ll<sbs; ll++) { 5705 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5706 for (l=0; l<ncols; l++) { 5707 *bufA++ = vals[l]; 5708 } 5709 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5710 } 5711 } 5712 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5713 } 5714 /* recvs and sends of a-array are completed */ 5715 i = nrecvs; 5716 while (i--) { 5717 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5718 } 5719 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5720 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5721 5722 if (scall == MAT_INITIAL_MATRIX) { 5723 /* put together the new matrix */ 5724 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5725 5726 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5727 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5728 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5729 b_oth->free_a = PETSC_TRUE; 5730 b_oth->free_ij = PETSC_TRUE; 5731 b_oth->nonew = 0; 5732 5733 ierr = PetscFree(bufj);CHKERRQ(ierr); 5734 if (!startsj_s || !bufa_ptr) { 5735 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5736 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5737 } else { 5738 *startsj_s = sstartsj; 5739 *startsj_r = rstartsj; 5740 *bufa_ptr = bufa; 5741 } 5742 } 5743 5744 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5745 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5746 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5747 PetscFunctionReturn(0); 5748 } 5749 5750 /*@C 5751 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5752 5753 Not Collective 5754 5755 Input Parameters: 5756 . A - The matrix in mpiaij format 5757 5758 Output Parameter: 5759 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5760 . colmap - A map from global column index to local index into lvec 5761 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5762 5763 Level: developer 5764 5765 @*/ 5766 #if defined(PETSC_USE_CTABLE) 5767 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5768 #else 5769 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5770 #endif 5771 { 5772 Mat_MPIAIJ *a; 5773 5774 PetscFunctionBegin; 5775 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5776 PetscValidPointer(lvec, 2); 5777 PetscValidPointer(colmap, 3); 5778 PetscValidPointer(multScatter, 4); 5779 a = (Mat_MPIAIJ*) A->data; 5780 if (lvec) *lvec = a->lvec; 5781 if (colmap) *colmap = a->colmap; 5782 if (multScatter) *multScatter = a->Mvctx; 5783 PetscFunctionReturn(0); 5784 } 5785 5786 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5787 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5788 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5789 #if defined(PETSC_HAVE_MKL_SPARSE) 5790 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5791 #endif 5792 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5793 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5794 #if defined(PETSC_HAVE_ELEMENTAL) 5795 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5796 #endif 5797 #if defined(PETSC_HAVE_SCALAPACK) 5798 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5799 #endif 5800 #if defined(PETSC_HAVE_HYPRE) 5801 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5802 #endif 5803 #if defined(PETSC_HAVE_CUDA) 5804 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5805 #endif 5806 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5807 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5808 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5809 5810 /* 5811 Computes (B'*A')' since computing B*A directly is untenable 5812 5813 n p p 5814 [ ] [ ] [ ] 5815 m [ A ] * n [ B ] = m [ C ] 5816 [ ] [ ] [ ] 5817 5818 */ 5819 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5820 { 5821 PetscErrorCode ierr; 5822 Mat At,Bt,Ct; 5823 5824 PetscFunctionBegin; 5825 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5826 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5827 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5828 ierr = MatDestroy(&At);CHKERRQ(ierr); 5829 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5830 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5831 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5832 PetscFunctionReturn(0); 5833 } 5834 5835 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5836 { 5837 PetscErrorCode ierr; 5838 PetscBool cisdense; 5839 5840 PetscFunctionBegin; 5841 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5842 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5843 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5844 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5845 if (!cisdense) { 5846 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5847 } 5848 ierr = MatSetUp(C);CHKERRQ(ierr); 5849 5850 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5851 PetscFunctionReturn(0); 5852 } 5853 5854 /* ----------------------------------------------------------------*/ 5855 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5856 { 5857 Mat_Product *product = C->product; 5858 Mat A = product->A,B=product->B; 5859 5860 PetscFunctionBegin; 5861 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5862 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5863 5864 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5865 C->ops->productsymbolic = MatProductSymbolic_AB; 5866 PetscFunctionReturn(0); 5867 } 5868 5869 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5870 { 5871 PetscErrorCode ierr; 5872 Mat_Product *product = C->product; 5873 5874 PetscFunctionBegin; 5875 if (product->type == MATPRODUCT_AB) { 5876 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5877 } 5878 PetscFunctionReturn(0); 5879 } 5880 /* ----------------------------------------------------------------*/ 5881 5882 /*MC 5883 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5884 5885 Options Database Keys: 5886 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5887 5888 Level: beginner 5889 5890 Notes: 5891 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5892 in this case the values associated with the rows and columns one passes in are set to zero 5893 in the matrix 5894 5895 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5896 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5897 5898 .seealso: MatCreateAIJ() 5899 M*/ 5900 5901 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5902 { 5903 Mat_MPIAIJ *b; 5904 PetscErrorCode ierr; 5905 PetscMPIInt size; 5906 5907 PetscFunctionBegin; 5908 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5909 5910 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5911 B->data = (void*)b; 5912 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5913 B->assembled = PETSC_FALSE; 5914 B->insertmode = NOT_SET_VALUES; 5915 b->size = size; 5916 5917 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5918 5919 /* build cache for off array entries formed */ 5920 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5921 5922 b->donotstash = PETSC_FALSE; 5923 b->colmap = NULL; 5924 b->garray = NULL; 5925 b->roworiented = PETSC_TRUE; 5926 5927 /* stuff used for matrix vector multiply */ 5928 b->lvec = NULL; 5929 b->Mvctx = NULL; 5930 5931 /* stuff for MatGetRow() */ 5932 b->rowindices = NULL; 5933 b->rowvalues = NULL; 5934 b->getrowactive = PETSC_FALSE; 5935 5936 /* flexible pointer used in CUSP/CUSPARSE classes */ 5937 b->spptr = NULL; 5938 5939 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5940 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5941 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5942 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5943 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5944 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5945 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5946 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5947 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5948 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5949 #if defined(PETSC_HAVE_MKL_SPARSE) 5950 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5951 #endif 5952 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5953 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5954 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5955 #if defined(PETSC_HAVE_ELEMENTAL) 5956 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5957 #endif 5958 #if defined(PETSC_HAVE_SCALAPACK) 5959 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 5960 #endif 5961 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5962 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5963 #if defined(PETSC_HAVE_HYPRE) 5964 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5965 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5966 #endif 5967 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5968 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5969 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5970 PetscFunctionReturn(0); 5971 } 5972 5973 /*@C 5974 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5975 and "off-diagonal" part of the matrix in CSR format. 5976 5977 Collective 5978 5979 Input Parameters: 5980 + comm - MPI communicator 5981 . m - number of local rows (Cannot be PETSC_DECIDE) 5982 . n - This value should be the same as the local size used in creating the 5983 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5984 calculated if N is given) For square matrices n is almost always m. 5985 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5986 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5987 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5988 . j - column indices 5989 . a - matrix values 5990 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5991 . oj - column indices 5992 - oa - matrix values 5993 5994 Output Parameter: 5995 . mat - the matrix 5996 5997 Level: advanced 5998 5999 Notes: 6000 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6001 must free the arrays once the matrix has been destroyed and not before. 6002 6003 The i and j indices are 0 based 6004 6005 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6006 6007 This sets local rows and cannot be used to set off-processor values. 6008 6009 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6010 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6011 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6012 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6013 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6014 communication if it is known that only local entries will be set. 6015 6016 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6017 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6018 @*/ 6019 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6020 { 6021 PetscErrorCode ierr; 6022 Mat_MPIAIJ *maij; 6023 6024 PetscFunctionBegin; 6025 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6026 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6027 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6028 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6029 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6030 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6031 maij = (Mat_MPIAIJ*) (*mat)->data; 6032 6033 (*mat)->preallocated = PETSC_TRUE; 6034 6035 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6036 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6037 6038 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6039 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6040 6041 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6042 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6043 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6044 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6045 6046 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6047 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6048 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6049 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6050 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6051 PetscFunctionReturn(0); 6052 } 6053 6054 /* 6055 Special version for direct calls from Fortran 6056 */ 6057 #include <petsc/private/fortranimpl.h> 6058 6059 /* Change these macros so can be used in void function */ 6060 #undef CHKERRQ 6061 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6062 #undef SETERRQ2 6063 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6064 #undef SETERRQ3 6065 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6066 #undef SETERRQ 6067 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6068 6069 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6070 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6071 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6072 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6073 #else 6074 #endif 6075 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6076 { 6077 Mat mat = *mmat; 6078 PetscInt m = *mm, n = *mn; 6079 InsertMode addv = *maddv; 6080 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6081 PetscScalar value; 6082 PetscErrorCode ierr; 6083 6084 MatCheckPreallocated(mat,1); 6085 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6086 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6087 { 6088 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6089 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6090 PetscBool roworiented = aij->roworiented; 6091 6092 /* Some Variables required in the macro */ 6093 Mat A = aij->A; 6094 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6095 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6096 MatScalar *aa = a->a; 6097 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6098 Mat B = aij->B; 6099 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6100 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6101 MatScalar *ba = b->a; 6102 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6103 * cannot use "#if defined" inside a macro. */ 6104 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6105 6106 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6107 PetscInt nonew = a->nonew; 6108 MatScalar *ap1,*ap2; 6109 6110 PetscFunctionBegin; 6111 for (i=0; i<m; i++) { 6112 if (im[i] < 0) continue; 6113 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6114 if (im[i] >= rstart && im[i] < rend) { 6115 row = im[i] - rstart; 6116 lastcol1 = -1; 6117 rp1 = aj + ai[row]; 6118 ap1 = aa + ai[row]; 6119 rmax1 = aimax[row]; 6120 nrow1 = ailen[row]; 6121 low1 = 0; 6122 high1 = nrow1; 6123 lastcol2 = -1; 6124 rp2 = bj + bi[row]; 6125 ap2 = ba + bi[row]; 6126 rmax2 = bimax[row]; 6127 nrow2 = bilen[row]; 6128 low2 = 0; 6129 high2 = nrow2; 6130 6131 for (j=0; j<n; j++) { 6132 if (roworiented) value = v[i*n+j]; 6133 else value = v[i+j*m]; 6134 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6135 if (in[j] >= cstart && in[j] < cend) { 6136 col = in[j] - cstart; 6137 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6138 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6139 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6140 #endif 6141 } else if (in[j] < 0) continue; 6142 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6143 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6144 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6145 } else { 6146 if (mat->was_assembled) { 6147 if (!aij->colmap) { 6148 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6149 } 6150 #if defined(PETSC_USE_CTABLE) 6151 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6152 col--; 6153 #else 6154 col = aij->colmap[in[j]] - 1; 6155 #endif 6156 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6157 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6158 col = in[j]; 6159 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6160 B = aij->B; 6161 b = (Mat_SeqAIJ*)B->data; 6162 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6163 rp2 = bj + bi[row]; 6164 ap2 = ba + bi[row]; 6165 rmax2 = bimax[row]; 6166 nrow2 = bilen[row]; 6167 low2 = 0; 6168 high2 = nrow2; 6169 bm = aij->B->rmap->n; 6170 ba = b->a; 6171 inserted = PETSC_FALSE; 6172 } 6173 } else col = in[j]; 6174 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6175 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6176 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6177 #endif 6178 } 6179 } 6180 } else if (!aij->donotstash) { 6181 if (roworiented) { 6182 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6183 } else { 6184 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6185 } 6186 } 6187 } 6188 } 6189 PetscFunctionReturnVoid(); 6190 } 6191