1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 584 #endif 585 if (im[i] >= rstart && im[i] < rend) { 586 row = im[i] - rstart; 587 lastcol1 = -1; 588 rp1 = aj + ai[row]; 589 ap1 = aa + ai[row]; 590 rmax1 = aimax[row]; 591 nrow1 = ailen[row]; 592 low1 = 0; 593 high1 = nrow1; 594 lastcol2 = -1; 595 rp2 = bj + bi[row]; 596 ap2 = ba + bi[row]; 597 rmax2 = bimax[row]; 598 nrow2 = bilen[row]; 599 low2 = 0; 600 high2 = nrow2; 601 602 for (j=0; j<n; j++) { 603 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 604 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 605 if (in[j] >= cstart && in[j] < cend) { 606 col = in[j] - cstart; 607 nonew = a->nonew; 608 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 610 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 611 #endif 612 } else if (in[j] < 0) continue; 613 #if defined(PETSC_USE_DEBUG) 614 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 615 #endif 616 else { 617 if (mat->was_assembled) { 618 if (!aij->colmap) { 619 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 620 } 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 623 col--; 624 #else 625 col = aij->colmap[in[j]] - 1; 626 #endif 627 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 628 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 629 col = in[j]; 630 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 631 B = aij->B; 632 b = (Mat_SeqAIJ*)B->data; 633 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 634 rp2 = bj + bi[row]; 635 ap2 = ba + bi[row]; 636 rmax2 = bimax[row]; 637 nrow2 = bilen[row]; 638 low2 = 0; 639 high2 = nrow2; 640 bm = aij->B->rmap->n; 641 ba = b->a; 642 inserted = PETSC_FALSE; 643 } else if (col < 0) { 644 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 645 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 646 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 647 } 648 } else col = in[j]; 649 nonew = b->nonew; 650 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 652 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 653 #endif 654 } 655 } 656 } else { 657 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 658 if (!aij->donotstash) { 659 mat->assembled = PETSC_FALSE; 660 if (roworiented) { 661 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 662 } else { 663 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 664 } 665 } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 673 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 674 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 675 */ 676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 679 Mat A = aij->A; /* diagonal part of the matrix */ 680 Mat B = aij->B; /* offdiagonal part of the matrix */ 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 683 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 684 PetscInt *ailen = a->ilen,*aj = a->j; 685 PetscInt *bilen = b->ilen,*bj = b->j; 686 PetscInt am = aij->A->rmap->n,j; 687 PetscInt diag_so_far = 0,dnz; 688 PetscInt offd_so_far = 0,onz; 689 690 PetscFunctionBegin; 691 /* Iterate over all rows of the matrix */ 692 for (j=0; j<am; j++) { 693 dnz = onz = 0; 694 /* Iterate over all non-zero columns of the current row */ 695 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 696 /* If column is in the diagonal */ 697 if (mat_j[col] >= cstart && mat_j[col] < cend) { 698 aj[diag_so_far++] = mat_j[col] - cstart; 699 dnz++; 700 } else { /* off-diagonal entries */ 701 bj[offd_so_far++] = mat_j[col]; 702 onz++; 703 } 704 } 705 ailen[j] = dnz; 706 bilen[j] = onz; 707 } 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 713 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 714 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 715 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 716 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 717 */ 718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat A = aij->A; /* diagonal part of the matrix */ 722 Mat B = aij->B; /* offdiagonal part of the matrix */ 723 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 725 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 727 PetscInt *ailen = a->ilen,*aj = a->j; 728 PetscInt *bilen = b->ilen,*bj = b->j; 729 PetscInt am = aij->A->rmap->n,j; 730 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 731 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 732 PetscScalar *aa = a->a,*ba = b->a; 733 734 PetscFunctionBegin; 735 /* Iterate over all rows of the matrix */ 736 for (j=0; j<am; j++) { 737 dnz_row = onz_row = 0; 738 rowstart_offd = full_offd_i[j]; 739 rowstart_diag = full_diag_i[j]; 740 /* Iterate over all non-zero columns of the current row */ 741 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 742 /* If column is in the diagonal */ 743 if (mat_j[col] >= cstart && mat_j[col] < cend) { 744 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 745 aa[rowstart_diag+dnz_row] = mat_a[col]; 746 dnz_row++; 747 } else { /* off-diagonal entries */ 748 bj[rowstart_offd+onz_row] = mat_j[col]; 749 ba[rowstart_offd+onz_row] = mat_a[col]; 750 onz_row++; 751 } 752 } 753 ailen[j] = dnz_row; 754 bilen[j] = onz_row; 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 765 766 PetscFunctionBegin; 767 for (i=0; i<m; i++) { 768 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 769 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 770 if (idxm[i] >= rstart && idxm[i] < rend) { 771 row = idxm[i] - rstart; 772 for (j=0; j<n; j++) { 773 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 774 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 775 if (idxn[j] >= cstart && idxn[j] < cend) { 776 col = idxn[j] - cstart; 777 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 778 } else { 779 if (!aij->colmap) { 780 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 781 } 782 #if defined(PETSC_USE_CTABLE) 783 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 784 col--; 785 #else 786 col = aij->colmap[idxn[j]] - 1; 787 #endif 788 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 789 else { 790 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 800 801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 PetscErrorCode ierr; 805 PetscInt nstash,reallocs; 806 807 PetscFunctionBegin; 808 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 812 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt n; 822 PetscInt i,j,rstart,ncols,flg; 823 PetscInt *row,*col; 824 PetscBool other_disassembled; 825 PetscScalar *val; 826 827 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 828 829 PetscFunctionBegin; 830 if (!aij->donotstash && !mat->nooffprocentries) { 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 833 if (!flg) break; 834 835 for (i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for (j=i,rstart=row[j]; j<n; j++) { 838 if (row[j] != rstart) break; 839 } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 /* Now assemble all these values with a single function call */ 843 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 844 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 } 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = 0; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 1114 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1116 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 VecScatter Mvctx = a->Mvctx; 1136 1137 PetscFunctionBegin; 1138 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1139 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 1151 PetscFunctionBegin; 1152 /* do nondiagonal part */ 1153 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1154 /* do local part */ 1155 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1156 /* add partial results together */ 1157 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1163 { 1164 MPI_Comm comm; 1165 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1166 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1167 IS Me,Notme; 1168 PetscErrorCode ierr; 1169 PetscInt M,N,first,last,*notme,i; 1170 PetscBool lf; 1171 PetscMPIInt size; 1172 1173 PetscFunctionBegin; 1174 /* Easy test: symmetric diagonal block */ 1175 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1176 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1177 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1178 if (!*f) PetscFunctionReturn(0); 1179 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1181 if (size == 1) PetscFunctionReturn(0); 1182 1183 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1184 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1185 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1186 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1187 for (i=0; i<first; i++) notme[i] = i; 1188 for (i=last; i<M; i++) notme[i-last+first] = i; 1189 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1190 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1191 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1192 Aoff = Aoffs[0]; 1193 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1194 Boff = Boffs[0]; 1195 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1197 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1199 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1200 ierr = PetscFree(notme);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1205 { 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1214 { 1215 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1216 PetscErrorCode ierr; 1217 1218 PetscFunctionBegin; 1219 /* do nondiagonal part */ 1220 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1221 /* do local part */ 1222 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1223 /* add partial results together */ 1224 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 /* 1230 This only works correctly for square matrices where the subblock A->A is the 1231 diagonal block 1232 */ 1233 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1234 { 1235 PetscErrorCode ierr; 1236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1237 1238 PetscFunctionBegin; 1239 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1240 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1241 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1246 { 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1252 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 #if defined(PETSC_USE_LOG) 1263 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1264 #endif 1265 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1266 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1268 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1269 #if defined(PETSC_USE_CTABLE) 1270 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1271 #else 1272 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1273 #endif 1274 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1275 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1276 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1277 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1278 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1279 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1280 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1281 1282 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1292 #if defined(PETSC_HAVE_ELEMENTAL) 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1294 #endif 1295 #if defined(PETSC_HAVE_HYPRE) 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1301 PetscFunctionReturn(0); 1302 } 1303 1304 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1305 { 1306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1307 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1308 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1309 const PetscInt *garray = aij->garray; 1310 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1311 PetscInt *rowlens; 1312 PetscInt *colidxs; 1313 PetscScalar *matvals; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1318 1319 M = mat->rmap->N; 1320 N = mat->cmap->N; 1321 m = mat->rmap->n; 1322 rs = mat->rmap->rstart; 1323 cs = mat->cmap->rstart; 1324 nz = A->nz + B->nz; 1325 1326 /* write matrix header */ 1327 header[0] = MAT_FILE_CLASSID; 1328 header[1] = M; header[2] = N; header[3] = nz; 1329 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1330 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1331 1332 /* fill in and store row lengths */ 1333 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1334 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1335 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1336 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1337 1338 /* fill in and store column indices */ 1339 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1340 for (cnt=0, i=0; i<m; i++) { 1341 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1342 if (garray[B->j[jb]] > cs) break; 1343 colidxs[cnt++] = garray[B->j[jb]]; 1344 } 1345 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1346 colidxs[cnt++] = A->j[ja] + cs; 1347 for (; jb<B->i[i+1]; jb++) 1348 colidxs[cnt++] = garray[B->j[jb]]; 1349 } 1350 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1351 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1352 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1353 1354 /* fill in and store nonzero values */ 1355 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1356 for (cnt=0, i=0; i<m; i++) { 1357 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1358 if (garray[B->j[jb]] > cs) break; 1359 matvals[cnt++] = B->a[jb]; 1360 } 1361 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1362 matvals[cnt++] = A->a[ja]; 1363 for (; jb<B->i[i+1]; jb++) 1364 matvals[cnt++] = B->a[jb]; 1365 } 1366 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1367 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1368 ierr = PetscFree(matvals);CHKERRQ(ierr); 1369 1370 /* write block size option to the viewer's .info file */ 1371 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #include <petscdraw.h> 1376 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1377 { 1378 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1379 PetscErrorCode ierr; 1380 PetscMPIInt rank = aij->rank,size = aij->size; 1381 PetscBool isdraw,iascii,isbinary; 1382 PetscViewer sviewer; 1383 PetscViewerFormat format; 1384 1385 PetscFunctionBegin; 1386 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1387 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1389 if (iascii) { 1390 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1391 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1392 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1393 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1394 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1395 for (i=0; i<(PetscInt)size; i++) { 1396 nmax = PetscMax(nmax,nz[i]); 1397 nmin = PetscMin(nmin,nz[i]); 1398 navg += nz[i]; 1399 } 1400 ierr = PetscFree(nz);CHKERRQ(ierr); 1401 navg = navg/size; 1402 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1406 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1407 MatInfo info; 1408 PetscBool inodes; 1409 1410 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1411 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1412 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1413 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1414 if (!inodes) { 1415 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1416 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1417 } else { 1418 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1419 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1420 } 1421 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1422 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1423 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1425 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1426 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1427 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1428 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1431 PetscInt inodecount,inodelimit,*inodes; 1432 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1433 if (inodes) { 1434 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1435 } else { 1436 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1440 PetscFunctionReturn(0); 1441 } 1442 } else if (isbinary) { 1443 if (size == 1) { 1444 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1445 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1446 } else { 1447 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } else if (iascii && size == 1) { 1451 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1452 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1453 PetscFunctionReturn(0); 1454 } else if (isdraw) { 1455 PetscDraw draw; 1456 PetscBool isnull; 1457 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1458 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1459 if (isnull) PetscFunctionReturn(0); 1460 } 1461 1462 { /* assemble the entire matrix onto first processor */ 1463 Mat A = NULL, Av; 1464 IS isrow,iscol; 1465 1466 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1467 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1468 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1469 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1470 /* The commented code uses MatCreateSubMatrices instead */ 1471 /* 1472 Mat *AA, A = NULL, Av; 1473 IS isrow,iscol; 1474 1475 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1476 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1477 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1478 if (!rank) { 1479 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1480 A = AA[0]; 1481 Av = AA[0]; 1482 } 1483 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1484 */ 1485 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1486 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1487 /* 1488 Everyone has to call to draw the matrix since the graphics waits are 1489 synchronized across all processors that share the PetscDraw object 1490 */ 1491 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1492 if (!rank) { 1493 if (((PetscObject)mat)->name) { 1494 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1495 } 1496 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1497 } 1498 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1499 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1500 ierr = MatDestroy(&A);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } 1504 1505 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1506 { 1507 PetscErrorCode ierr; 1508 PetscBool iascii,isdraw,issocket,isbinary; 1509 1510 PetscFunctionBegin; 1511 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1512 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1513 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1514 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1515 if (iascii || isdraw || isbinary || issocket) { 1516 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 1521 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1522 { 1523 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1524 PetscErrorCode ierr; 1525 Vec bb1 = 0; 1526 PetscBool hasop; 1527 1528 PetscFunctionBegin; 1529 if (flag == SOR_APPLY_UPPER) { 1530 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1535 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1536 } 1537 1538 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1539 if (flag & SOR_ZERO_INITIAL_GUESS) { 1540 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1541 its--; 1542 } 1543 1544 while (its--) { 1545 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 1548 /* update rhs: bb1 = bb - B*x */ 1549 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1550 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1551 1552 /* local sweep */ 1553 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1554 } 1555 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1556 if (flag & SOR_ZERO_INITIAL_GUESS) { 1557 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1558 its--; 1559 } 1560 while (its--) { 1561 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1562 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 1564 /* update rhs: bb1 = bb - B*x */ 1565 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1566 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1567 1568 /* local sweep */ 1569 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1570 } 1571 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1572 if (flag & SOR_ZERO_INITIAL_GUESS) { 1573 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1574 its--; 1575 } 1576 while (its--) { 1577 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 1580 /* update rhs: bb1 = bb - B*x */ 1581 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1582 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1583 1584 /* local sweep */ 1585 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1586 } 1587 } else if (flag & SOR_EISENSTAT) { 1588 Vec xx1; 1589 1590 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1591 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1592 1593 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1594 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1595 if (!mat->diag) { 1596 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1597 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1598 } 1599 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1600 if (hasop) { 1601 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1602 } else { 1603 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1604 } 1605 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1606 1607 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1608 1609 /* local sweep */ 1610 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1611 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1612 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1613 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1614 1615 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1616 1617 matin->factorerrortype = mat->A->factorerrortype; 1618 PetscFunctionReturn(0); 1619 } 1620 1621 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1622 { 1623 Mat aA,aB,Aperm; 1624 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1625 PetscScalar *aa,*ba; 1626 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1627 PetscSF rowsf,sf; 1628 IS parcolp = NULL; 1629 PetscBool done; 1630 PetscErrorCode ierr; 1631 1632 PetscFunctionBegin; 1633 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1634 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1635 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1636 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1637 1638 /* Invert row permutation to find out where my rows should go */ 1639 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1640 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1641 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1642 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1643 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1644 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1645 1646 /* Invert column permutation to find out where my columns should go */ 1647 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1648 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1649 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1650 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1651 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1652 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1653 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1654 1655 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1656 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1657 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1658 1659 /* Find out where my gcols should go */ 1660 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1661 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1662 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1663 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1664 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1665 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1666 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1667 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1668 1669 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1670 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1671 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1672 for (i=0; i<m; i++) { 1673 PetscInt row = rdest[i]; 1674 PetscMPIInt rowner; 1675 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1676 for (j=ai[i]; j<ai[i+1]; j++) { 1677 PetscInt col = cdest[aj[j]]; 1678 PetscMPIInt cowner; 1679 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1680 if (rowner == cowner) dnnz[i]++; 1681 else onnz[i]++; 1682 } 1683 for (j=bi[i]; j<bi[i+1]; j++) { 1684 PetscInt col = gcdest[bj[j]]; 1685 PetscMPIInt cowner; 1686 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1687 if (rowner == cowner) dnnz[i]++; 1688 else onnz[i]++; 1689 } 1690 } 1691 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1692 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1693 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1694 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1695 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1696 1697 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1698 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1699 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1700 for (i=0; i<m; i++) { 1701 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1702 PetscInt j0,rowlen; 1703 rowlen = ai[i+1] - ai[i]; 1704 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1705 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1706 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1707 } 1708 rowlen = bi[i+1] - bi[i]; 1709 for (j0=j=0; j<rowlen; j0=j) { 1710 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1711 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1712 } 1713 } 1714 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1715 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1716 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1717 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1718 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1719 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1720 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1721 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1722 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1723 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1724 *B = Aperm; 1725 PetscFunctionReturn(0); 1726 } 1727 1728 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1729 { 1730 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1731 PetscErrorCode ierr; 1732 1733 PetscFunctionBegin; 1734 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1735 if (ghosts) *ghosts = aij->garray; 1736 PetscFunctionReturn(0); 1737 } 1738 1739 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1740 { 1741 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1742 Mat A = mat->A,B = mat->B; 1743 PetscErrorCode ierr; 1744 PetscLogDouble isend[5],irecv[5]; 1745 1746 PetscFunctionBegin; 1747 info->block_size = 1.0; 1748 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1749 1750 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1751 isend[3] = info->memory; isend[4] = info->mallocs; 1752 1753 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1754 1755 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1756 isend[3] += info->memory; isend[4] += info->mallocs; 1757 if (flag == MAT_LOCAL) { 1758 info->nz_used = isend[0]; 1759 info->nz_allocated = isend[1]; 1760 info->nz_unneeded = isend[2]; 1761 info->memory = isend[3]; 1762 info->mallocs = isend[4]; 1763 } else if (flag == MAT_GLOBAL_MAX) { 1764 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1765 1766 info->nz_used = irecv[0]; 1767 info->nz_allocated = irecv[1]; 1768 info->nz_unneeded = irecv[2]; 1769 info->memory = irecv[3]; 1770 info->mallocs = irecv[4]; 1771 } else if (flag == MAT_GLOBAL_SUM) { 1772 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1773 1774 info->nz_used = irecv[0]; 1775 info->nz_allocated = irecv[1]; 1776 info->nz_unneeded = irecv[2]; 1777 info->memory = irecv[3]; 1778 info->mallocs = irecv[4]; 1779 } 1780 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1781 info->fill_ratio_needed = 0; 1782 info->factor_mallocs = 0; 1783 PetscFunctionReturn(0); 1784 } 1785 1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1787 { 1788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 switch (op) { 1793 case MAT_NEW_NONZERO_LOCATIONS: 1794 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1795 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1796 case MAT_KEEP_NONZERO_PATTERN: 1797 case MAT_NEW_NONZERO_LOCATION_ERR: 1798 case MAT_USE_INODES: 1799 case MAT_IGNORE_ZERO_ENTRIES: 1800 MatCheckPreallocated(A,1); 1801 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1802 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1803 break; 1804 case MAT_ROW_ORIENTED: 1805 MatCheckPreallocated(A,1); 1806 a->roworiented = flg; 1807 1808 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1809 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1810 break; 1811 case MAT_NEW_DIAGONALS: 1812 case MAT_SORTED_FULL: 1813 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1814 break; 1815 case MAT_IGNORE_OFF_PROC_ENTRIES: 1816 a->donotstash = flg; 1817 break; 1818 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1819 case MAT_SPD: 1820 case MAT_SYMMETRIC: 1821 case MAT_STRUCTURALLY_SYMMETRIC: 1822 case MAT_HERMITIAN: 1823 case MAT_SYMMETRY_ETERNAL: 1824 break; 1825 case MAT_SUBMAT_SINGLEIS: 1826 A->submat_singleis = flg; 1827 break; 1828 case MAT_STRUCTURE_ONLY: 1829 /* The option is handled directly by MatSetOption() */ 1830 break; 1831 default: 1832 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1838 { 1839 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1840 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1841 PetscErrorCode ierr; 1842 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1843 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1844 PetscInt *cmap,*idx_p; 1845 1846 PetscFunctionBegin; 1847 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1848 mat->getrowactive = PETSC_TRUE; 1849 1850 if (!mat->rowvalues && (idx || v)) { 1851 /* 1852 allocate enough space to hold information from the longest row. 1853 */ 1854 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1855 PetscInt max = 1,tmp; 1856 for (i=0; i<matin->rmap->n; i++) { 1857 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1858 if (max < tmp) max = tmp; 1859 } 1860 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1861 } 1862 1863 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1864 lrow = row - rstart; 1865 1866 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1867 if (!v) {pvA = 0; pvB = 0;} 1868 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1869 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1870 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1871 nztot = nzA + nzB; 1872 1873 cmap = mat->garray; 1874 if (v || idx) { 1875 if (nztot) { 1876 /* Sort by increasing column numbers, assuming A and B already sorted */ 1877 PetscInt imark = -1; 1878 if (v) { 1879 *v = v_p = mat->rowvalues; 1880 for (i=0; i<nzB; i++) { 1881 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1882 else break; 1883 } 1884 imark = i; 1885 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1886 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1887 } 1888 if (idx) { 1889 *idx = idx_p = mat->rowindices; 1890 if (imark > -1) { 1891 for (i=0; i<imark; i++) { 1892 idx_p[i] = cmap[cworkB[i]]; 1893 } 1894 } else { 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1897 else break; 1898 } 1899 imark = i; 1900 } 1901 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1902 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1903 } 1904 } else { 1905 if (idx) *idx = 0; 1906 if (v) *v = 0; 1907 } 1908 } 1909 *nz = nztot; 1910 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1911 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1916 { 1917 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1918 1919 PetscFunctionBegin; 1920 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1921 aij->getrowactive = PETSC_FALSE; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1926 { 1927 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1928 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1929 PetscErrorCode ierr; 1930 PetscInt i,j,cstart = mat->cmap->rstart; 1931 PetscReal sum = 0.0; 1932 MatScalar *v; 1933 1934 PetscFunctionBegin; 1935 if (aij->size == 1) { 1936 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1937 } else { 1938 if (type == NORM_FROBENIUS) { 1939 v = amat->a; 1940 for (i=0; i<amat->nz; i++) { 1941 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1942 } 1943 v = bmat->a; 1944 for (i=0; i<bmat->nz; i++) { 1945 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1946 } 1947 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1948 *norm = PetscSqrtReal(*norm); 1949 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1950 } else if (type == NORM_1) { /* max column norm */ 1951 PetscReal *tmp,*tmp2; 1952 PetscInt *jj,*garray = aij->garray; 1953 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1954 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1955 *norm = 0.0; 1956 v = amat->a; jj = amat->j; 1957 for (j=0; j<amat->nz; j++) { 1958 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1959 } 1960 v = bmat->a; jj = bmat->j; 1961 for (j=0; j<bmat->nz; j++) { 1962 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1963 } 1964 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1965 for (j=0; j<mat->cmap->N; j++) { 1966 if (tmp2[j] > *norm) *norm = tmp2[j]; 1967 } 1968 ierr = PetscFree(tmp);CHKERRQ(ierr); 1969 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1970 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1971 } else if (type == NORM_INFINITY) { /* max row norm */ 1972 PetscReal ntemp = 0.0; 1973 for (j=0; j<aij->A->rmap->n; j++) { 1974 v = amat->a + amat->i[j]; 1975 sum = 0.0; 1976 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1977 sum += PetscAbsScalar(*v); v++; 1978 } 1979 v = bmat->a + bmat->i[j]; 1980 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1981 sum += PetscAbsScalar(*v); v++; 1982 } 1983 if (sum > ntemp) ntemp = sum; 1984 } 1985 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1986 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1987 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1993 { 1994 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1995 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1996 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1997 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1998 PetscErrorCode ierr; 1999 Mat B,A_diag,*B_diag; 2000 const MatScalar *array; 2001 2002 PetscFunctionBegin; 2003 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2004 ai = Aloc->i; aj = Aloc->j; 2005 bi = Bloc->i; bj = Bloc->j; 2006 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2007 PetscInt *d_nnz,*g_nnz,*o_nnz; 2008 PetscSFNode *oloc; 2009 PETSC_UNUSED PetscSF sf; 2010 2011 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2012 /* compute d_nnz for preallocation */ 2013 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2014 for (i=0; i<ai[ma]; i++) { 2015 d_nnz[aj[i]]++; 2016 } 2017 /* compute local off-diagonal contributions */ 2018 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2019 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2020 /* map those to global */ 2021 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2022 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2023 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2024 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2025 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2026 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2027 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2028 2029 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2030 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2031 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2032 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2033 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2034 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2035 } else { 2036 B = *matout; 2037 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2038 } 2039 2040 b = (Mat_MPIAIJ*)B->data; 2041 A_diag = a->A; 2042 B_diag = &b->A; 2043 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2044 A_diag_ncol = A_diag->cmap->N; 2045 B_diag_ilen = sub_B_diag->ilen; 2046 B_diag_i = sub_B_diag->i; 2047 2048 /* Set ilen for diagonal of B */ 2049 for (i=0; i<A_diag_ncol; i++) { 2050 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2051 } 2052 2053 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2054 very quickly (=without using MatSetValues), because all writes are local. */ 2055 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2056 2057 /* copy over the B part */ 2058 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2059 array = Bloc->a; 2060 row = A->rmap->rstart; 2061 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2062 cols_tmp = cols; 2063 for (i=0; i<mb; i++) { 2064 ncol = bi[i+1]-bi[i]; 2065 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2066 row++; 2067 array += ncol; cols_tmp += ncol; 2068 } 2069 ierr = PetscFree(cols);CHKERRQ(ierr); 2070 2071 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2072 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2074 *matout = B; 2075 } else { 2076 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2077 } 2078 PetscFunctionReturn(0); 2079 } 2080 2081 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2082 { 2083 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2084 Mat a = aij->A,b = aij->B; 2085 PetscErrorCode ierr; 2086 PetscInt s1,s2,s3; 2087 2088 PetscFunctionBegin; 2089 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2090 if (rr) { 2091 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2092 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2093 /* Overlap communication with computation. */ 2094 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2095 } 2096 if (ll) { 2097 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2098 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2099 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2100 } 2101 /* scale the diagonal block */ 2102 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2103 2104 if (rr) { 2105 /* Do a scatter end and then right scale the off-diagonal block */ 2106 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2107 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2113 { 2114 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2115 PetscErrorCode ierr; 2116 2117 PetscFunctionBegin; 2118 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2123 { 2124 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2125 Mat a,b,c,d; 2126 PetscBool flg; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 a = matA->A; b = matA->B; 2131 c = matB->A; d = matB->B; 2132 2133 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2134 if (flg) { 2135 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2136 } 2137 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2142 { 2143 PetscErrorCode ierr; 2144 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2145 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2146 2147 PetscFunctionBegin; 2148 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2149 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2150 /* because of the column compression in the off-processor part of the matrix a->B, 2151 the number of columns in a->B and b->B may be different, hence we cannot call 2152 the MatCopy() directly on the two parts. If need be, we can provide a more 2153 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2154 then copying the submatrices */ 2155 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2156 } else { 2157 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2158 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2159 } 2160 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2165 { 2166 PetscErrorCode ierr; 2167 2168 PetscFunctionBegin; 2169 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2170 PetscFunctionReturn(0); 2171 } 2172 2173 /* 2174 Computes the number of nonzeros per row needed for preallocation when X and Y 2175 have different nonzero structure. 2176 */ 2177 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2178 { 2179 PetscInt i,j,k,nzx,nzy; 2180 2181 PetscFunctionBegin; 2182 /* Set the number of nonzeros in the new matrix */ 2183 for (i=0; i<m; i++) { 2184 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2185 nzx = xi[i+1] - xi[i]; 2186 nzy = yi[i+1] - yi[i]; 2187 nnz[i] = 0; 2188 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2189 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2190 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2191 nnz[i]++; 2192 } 2193 for (; k<nzy; k++) nnz[i]++; 2194 } 2195 PetscFunctionReturn(0); 2196 } 2197 2198 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2199 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2200 { 2201 PetscErrorCode ierr; 2202 PetscInt m = Y->rmap->N; 2203 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2204 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2205 2206 PetscFunctionBegin; 2207 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2212 { 2213 PetscErrorCode ierr; 2214 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2215 PetscBLASInt bnz,one=1; 2216 Mat_SeqAIJ *x,*y; 2217 2218 PetscFunctionBegin; 2219 if (str == SAME_NONZERO_PATTERN) { 2220 PetscScalar alpha = a; 2221 x = (Mat_SeqAIJ*)xx->A->data; 2222 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2223 y = (Mat_SeqAIJ*)yy->A->data; 2224 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2225 x = (Mat_SeqAIJ*)xx->B->data; 2226 y = (Mat_SeqAIJ*)yy->B->data; 2227 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2228 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2229 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2230 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2231 will be updated */ 2232 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2233 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2234 Y->offloadmask = PETSC_OFFLOAD_CPU; 2235 } 2236 #endif 2237 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2238 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2239 } else { 2240 Mat B; 2241 PetscInt *nnz_d,*nnz_o; 2242 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2243 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2244 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2245 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2246 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2247 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2248 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2250 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2251 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2252 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2253 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2254 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2255 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 2260 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2261 2262 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2263 { 2264 #if defined(PETSC_USE_COMPLEX) 2265 PetscErrorCode ierr; 2266 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2267 2268 PetscFunctionBegin; 2269 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2270 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2271 #else 2272 PetscFunctionBegin; 2273 #endif 2274 PetscFunctionReturn(0); 2275 } 2276 2277 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2278 { 2279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2284 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2289 { 2290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2291 PetscErrorCode ierr; 2292 2293 PetscFunctionBegin; 2294 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2295 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2296 PetscFunctionReturn(0); 2297 } 2298 2299 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2300 { 2301 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2302 PetscErrorCode ierr; 2303 PetscInt i,*idxb = 0; 2304 PetscScalar *va,*vb; 2305 Vec vtmp; 2306 2307 PetscFunctionBegin; 2308 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2309 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2310 if (idx) { 2311 for (i=0; i<A->rmap->n; i++) { 2312 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2313 } 2314 } 2315 2316 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2317 if (idx) { 2318 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2319 } 2320 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2321 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2322 2323 for (i=0; i<A->rmap->n; i++) { 2324 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2325 va[i] = vb[i]; 2326 if (idx) idx[i] = a->garray[idxb[i]]; 2327 } 2328 } 2329 2330 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2331 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2332 ierr = PetscFree(idxb);CHKERRQ(ierr); 2333 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2338 { 2339 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2340 PetscErrorCode ierr; 2341 PetscInt i,*idxb = 0; 2342 PetscScalar *va,*vb; 2343 Vec vtmp; 2344 2345 PetscFunctionBegin; 2346 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2347 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2348 if (idx) { 2349 for (i=0; i<A->cmap->n; i++) { 2350 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2351 } 2352 } 2353 2354 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2355 if (idx) { 2356 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2357 } 2358 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2359 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2360 2361 for (i=0; i<A->rmap->n; i++) { 2362 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2363 va[i] = vb[i]; 2364 if (idx) idx[i] = a->garray[idxb[i]]; 2365 } 2366 } 2367 2368 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2369 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2370 ierr = PetscFree(idxb);CHKERRQ(ierr); 2371 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2372 PetscFunctionReturn(0); 2373 } 2374 2375 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2376 { 2377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2378 PetscInt n = A->rmap->n; 2379 PetscInt cstart = A->cmap->rstart; 2380 PetscInt *cmap = mat->garray; 2381 PetscInt *diagIdx, *offdiagIdx; 2382 Vec diagV, offdiagV; 2383 PetscScalar *a, *diagA, *offdiagA; 2384 PetscInt r; 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2389 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2390 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2391 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2392 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2393 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2394 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2395 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2396 for (r = 0; r < n; ++r) { 2397 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2398 a[r] = diagA[r]; 2399 idx[r] = cstart + diagIdx[r]; 2400 } else { 2401 a[r] = offdiagA[r]; 2402 idx[r] = cmap[offdiagIdx[r]]; 2403 } 2404 } 2405 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2406 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2407 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2408 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2409 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2410 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2415 { 2416 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2417 PetscInt n = A->rmap->n; 2418 PetscInt cstart = A->cmap->rstart; 2419 PetscInt *cmap = mat->garray; 2420 PetscInt *diagIdx, *offdiagIdx; 2421 Vec diagV, offdiagV; 2422 PetscScalar *a, *diagA, *offdiagA; 2423 PetscInt r; 2424 PetscErrorCode ierr; 2425 2426 PetscFunctionBegin; 2427 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2428 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2429 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2430 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2431 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2432 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2433 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2434 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2435 for (r = 0; r < n; ++r) { 2436 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2437 a[r] = diagA[r]; 2438 idx[r] = cstart + diagIdx[r]; 2439 } else { 2440 a[r] = offdiagA[r]; 2441 idx[r] = cmap[offdiagIdx[r]]; 2442 } 2443 } 2444 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2445 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2446 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2447 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2448 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2449 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2454 { 2455 PetscErrorCode ierr; 2456 Mat *dummy; 2457 2458 PetscFunctionBegin; 2459 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2460 *newmat = *dummy; 2461 ierr = PetscFree(dummy);CHKERRQ(ierr); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2466 { 2467 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2468 PetscErrorCode ierr; 2469 2470 PetscFunctionBegin; 2471 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2472 A->factorerrortype = a->A->factorerrortype; 2473 PetscFunctionReturn(0); 2474 } 2475 2476 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2477 { 2478 PetscErrorCode ierr; 2479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2480 2481 PetscFunctionBegin; 2482 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2483 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2484 if (x->assembled) { 2485 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2486 } else { 2487 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2488 } 2489 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2490 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2495 { 2496 PetscFunctionBegin; 2497 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2498 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2499 PetscFunctionReturn(0); 2500 } 2501 2502 /*@ 2503 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2504 2505 Collective on Mat 2506 2507 Input Parameters: 2508 + A - the matrix 2509 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2510 2511 Level: advanced 2512 2513 @*/ 2514 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2515 { 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2524 { 2525 PetscErrorCode ierr; 2526 PetscBool sc = PETSC_FALSE,flg; 2527 2528 PetscFunctionBegin; 2529 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2530 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2531 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2532 if (flg) { 2533 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2534 } 2535 ierr = PetscOptionsTail();CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2540 { 2541 PetscErrorCode ierr; 2542 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2543 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2544 2545 PetscFunctionBegin; 2546 if (!Y->preallocated) { 2547 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2548 } else if (!aij->nz) { 2549 PetscInt nonew = aij->nonew; 2550 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2551 aij->nonew = nonew; 2552 } 2553 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2558 { 2559 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2560 PetscErrorCode ierr; 2561 2562 PetscFunctionBegin; 2563 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2564 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2565 if (d) { 2566 PetscInt rstart; 2567 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2568 *d += rstart; 2569 2570 } 2571 PetscFunctionReturn(0); 2572 } 2573 2574 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2575 { 2576 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /* -------------------------------------------------------------------*/ 2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2586 MatGetRow_MPIAIJ, 2587 MatRestoreRow_MPIAIJ, 2588 MatMult_MPIAIJ, 2589 /* 4*/ MatMultAdd_MPIAIJ, 2590 MatMultTranspose_MPIAIJ, 2591 MatMultTransposeAdd_MPIAIJ, 2592 0, 2593 0, 2594 0, 2595 /*10*/ 0, 2596 0, 2597 0, 2598 MatSOR_MPIAIJ, 2599 MatTranspose_MPIAIJ, 2600 /*15*/ MatGetInfo_MPIAIJ, 2601 MatEqual_MPIAIJ, 2602 MatGetDiagonal_MPIAIJ, 2603 MatDiagonalScale_MPIAIJ, 2604 MatNorm_MPIAIJ, 2605 /*20*/ MatAssemblyBegin_MPIAIJ, 2606 MatAssemblyEnd_MPIAIJ, 2607 MatSetOption_MPIAIJ, 2608 MatZeroEntries_MPIAIJ, 2609 /*24*/ MatZeroRows_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*29*/ MatSetUp_MPIAIJ, 2615 0, 2616 0, 2617 MatGetDiagonalBlock_MPIAIJ, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatCreateSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 MatShift_MPIAIJ, 2632 MatDiagonalSet_MPIAIJ, 2633 MatZeroRowsColumns_MPIAIJ, 2634 /*49*/ MatSetRandom_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIXAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatCreateSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2649 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2650 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*75*/ MatFDColoringApply_AIJ, 2661 MatSetFromOptions_MPIAIJ, 2662 0, 2663 0, 2664 MatFindZeroDiagonals_MPIAIJ, 2665 /*80*/ 0, 2666 0, 2667 0, 2668 /*83*/ MatLoad_MPIAIJ, 2669 MatIsSymmetric_MPIAIJ, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2675 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2676 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2677 MatPtAP_MPIAIJ_MPIAIJ, 2678 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2679 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 MatBindToCPU_MPIAIJ, 2684 /*99*/ 0, 2685 0, 2686 0, 2687 MatConjugate_MPIAIJ, 2688 0, 2689 /*104*/MatSetValuesRow_MPIAIJ, 2690 MatRealPart_MPIAIJ, 2691 MatImaginaryPart_MPIAIJ, 2692 0, 2693 0, 2694 /*109*/0, 2695 0, 2696 MatGetRowMin_MPIAIJ, 2697 0, 2698 MatMissingDiagonal_MPIAIJ, 2699 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2700 0, 2701 MatGetGhosts_MPIAIJ, 2702 0, 2703 0, 2704 /*119*/0, 2705 0, 2706 0, 2707 0, 2708 MatGetMultiProcBlock_MPIAIJ, 2709 /*124*/MatFindNonzeroRows_MPIAIJ, 2710 MatGetColumnNorms_MPIAIJ, 2711 MatInvertBlockDiagonal_MPIAIJ, 2712 MatInvertVariableBlockDiagonal_MPIAIJ, 2713 MatCreateSubMatricesMPI_MPIAIJ, 2714 /*129*/0, 2715 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2716 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2717 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2718 0, 2719 /*134*/0, 2720 0, 2721 MatRARt_MPIAIJ_MPIAIJ, 2722 0, 2723 0, 2724 /*139*/MatSetBlockSizes_MPIAIJ, 2725 0, 2726 0, 2727 MatFDColoringSetUp_MPIXAIJ, 2728 MatFindOffBlockDiagonalEntries_MPIAIJ, 2729 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2730 }; 2731 2732 /* ----------------------------------------------------------------------------------------*/ 2733 2734 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2735 { 2736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2741 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2742 PetscFunctionReturn(0); 2743 } 2744 2745 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2746 { 2747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2752 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2753 PetscFunctionReturn(0); 2754 } 2755 2756 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2757 { 2758 Mat_MPIAIJ *b; 2759 PetscErrorCode ierr; 2760 PetscMPIInt size; 2761 2762 PetscFunctionBegin; 2763 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2765 b = (Mat_MPIAIJ*)B->data; 2766 2767 #if defined(PETSC_USE_CTABLE) 2768 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2769 #else 2770 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2771 #endif 2772 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2773 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2774 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2775 2776 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2777 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2778 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2779 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2780 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2781 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2782 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2784 2785 if (!B->preallocated) { 2786 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2787 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2788 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2789 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2791 } 2792 2793 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2794 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2795 B->preallocated = PETSC_TRUE; 2796 B->was_assembled = PETSC_FALSE; 2797 B->assembled = PETSC_FALSE; 2798 PetscFunctionReturn(0); 2799 } 2800 2801 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2802 { 2803 Mat_MPIAIJ *b; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2808 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2809 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2810 b = (Mat_MPIAIJ*)B->data; 2811 2812 #if defined(PETSC_USE_CTABLE) 2813 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2814 #else 2815 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2816 #endif 2817 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2818 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2819 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2820 2821 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2822 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2823 B->preallocated = PETSC_TRUE; 2824 B->was_assembled = PETSC_FALSE; 2825 B->assembled = PETSC_FALSE; 2826 PetscFunctionReturn(0); 2827 } 2828 2829 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2830 { 2831 Mat mat; 2832 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2833 PetscErrorCode ierr; 2834 2835 PetscFunctionBegin; 2836 *newmat = 0; 2837 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2838 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2839 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2840 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2841 a = (Mat_MPIAIJ*)mat->data; 2842 2843 mat->factortype = matin->factortype; 2844 mat->assembled = matin->assembled; 2845 mat->insertmode = NOT_SET_VALUES; 2846 mat->preallocated = matin->preallocated; 2847 2848 a->size = oldmat->size; 2849 a->rank = oldmat->rank; 2850 a->donotstash = oldmat->donotstash; 2851 a->roworiented = oldmat->roworiented; 2852 a->rowindices = NULL; 2853 a->rowvalues = NULL; 2854 a->getrowactive = PETSC_FALSE; 2855 2856 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2857 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2858 2859 if (oldmat->colmap) { 2860 #if defined(PETSC_USE_CTABLE) 2861 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2862 #else 2863 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2864 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2865 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2866 #endif 2867 } else a->colmap = NULL; 2868 if (oldmat->garray) { 2869 PetscInt len; 2870 len = oldmat->B->cmap->n; 2871 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2872 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2873 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2874 } else a->garray = NULL; 2875 2876 /* It may happen MatDuplicate is called with a non-assembled matrix 2877 In fact, MatDuplicate only requires the matrix to be preallocated 2878 This may happen inside a DMCreateMatrix_Shell */ 2879 if (oldmat->lvec) { 2880 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2881 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2882 } 2883 if (oldmat->Mvctx) { 2884 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2885 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2886 } 2887 if (oldmat->Mvctx_mpi1) { 2888 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2890 } 2891 2892 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2893 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2894 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2895 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2896 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2897 *newmat = mat; 2898 PetscFunctionReturn(0); 2899 } 2900 2901 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2902 { 2903 PetscBool isbinary, ishdf5; 2904 PetscErrorCode ierr; 2905 2906 PetscFunctionBegin; 2907 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2908 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2909 /* force binary viewer to load .info file if it has not yet done so */ 2910 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2911 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2912 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2913 if (isbinary) { 2914 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2915 } else if (ishdf5) { 2916 #if defined(PETSC_HAVE_HDF5) 2917 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2918 #else 2919 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2920 #endif 2921 } else { 2922 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2923 } 2924 PetscFunctionReturn(0); 2925 } 2926 2927 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2928 { 2929 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2930 PetscInt *rowidxs,*colidxs; 2931 PetscScalar *matvals; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2936 2937 /* read in matrix header */ 2938 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2939 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2940 M = header[1]; N = header[2]; nz = header[3]; 2941 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2942 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2943 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2944 2945 /* set block sizes from the viewer's .info file */ 2946 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2947 /* set global sizes if not set already */ 2948 if (mat->rmap->N < 0) mat->rmap->N = M; 2949 if (mat->cmap->N < 0) mat->cmap->N = N; 2950 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2951 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2952 2953 /* check if the matrix sizes are correct */ 2954 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2955 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2956 2957 /* read in row lengths and build row indices */ 2958 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2959 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2960 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2961 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2962 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2963 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2964 /* read in column indices and matrix values */ 2965 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2966 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2967 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2968 /* store matrix indices and values */ 2969 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2970 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2971 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2972 PetscFunctionReturn(0); 2973 } 2974 2975 /* Not scalable because of ISAllGather() unless getting all columns. */ 2976 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2977 { 2978 PetscErrorCode ierr; 2979 IS iscol_local; 2980 PetscBool isstride; 2981 PetscMPIInt lisstride=0,gisstride; 2982 2983 PetscFunctionBegin; 2984 /* check if we are grabbing all columns*/ 2985 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2986 2987 if (isstride) { 2988 PetscInt start,len,mstart,mlen; 2989 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2990 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2991 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2992 if (mstart == start && mlen-mstart == len) lisstride = 1; 2993 } 2994 2995 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2996 if (gisstride) { 2997 PetscInt N; 2998 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 2999 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3000 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3001 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3002 } else { 3003 PetscInt cbs; 3004 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3005 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3006 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3007 } 3008 3009 *isseq = iscol_local; 3010 PetscFunctionReturn(0); 3011 } 3012 3013 /* 3014 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3015 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3016 3017 Input Parameters: 3018 mat - matrix 3019 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3020 i.e., mat->rstart <= isrow[i] < mat->rend 3021 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3022 i.e., mat->cstart <= iscol[i] < mat->cend 3023 Output Parameter: 3024 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3025 iscol_o - sequential column index set for retrieving mat->B 3026 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3027 */ 3028 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3029 { 3030 PetscErrorCode ierr; 3031 Vec x,cmap; 3032 const PetscInt *is_idx; 3033 PetscScalar *xarray,*cmaparray; 3034 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3035 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3036 Mat B=a->B; 3037 Vec lvec=a->lvec,lcmap; 3038 PetscInt i,cstart,cend,Bn=B->cmap->N; 3039 MPI_Comm comm; 3040 VecScatter Mvctx=a->Mvctx; 3041 3042 PetscFunctionBegin; 3043 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3044 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3045 3046 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3047 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3048 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3049 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3050 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3051 3052 /* Get start indices */ 3053 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3054 isstart -= ncols; 3055 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3056 3057 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3058 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3059 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3060 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3061 for (i=0; i<ncols; i++) { 3062 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3063 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3064 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3065 } 3066 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3067 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3068 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3069 3070 /* Get iscol_d */ 3071 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3072 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3073 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3074 3075 /* Get isrow_d */ 3076 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3077 rstart = mat->rmap->rstart; 3078 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3079 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3080 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3081 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3082 3083 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3084 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3085 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3086 3087 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3088 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3089 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3090 3091 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3092 3093 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3094 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3095 3096 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3097 /* off-process column indices */ 3098 count = 0; 3099 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3100 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3101 3102 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3103 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3104 for (i=0; i<Bn; i++) { 3105 if (PetscRealPart(xarray[i]) > -1.0) { 3106 idx[count] = i; /* local column index in off-diagonal part B */ 3107 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3108 count++; 3109 } 3110 } 3111 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3112 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3113 3114 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3115 /* cannot ensure iscol_o has same blocksize as iscol! */ 3116 3117 ierr = PetscFree(idx);CHKERRQ(ierr); 3118 *garray = cmap1; 3119 3120 ierr = VecDestroy(&x);CHKERRQ(ierr); 3121 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3122 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3123 PetscFunctionReturn(0); 3124 } 3125 3126 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3127 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3128 { 3129 PetscErrorCode ierr; 3130 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3131 Mat M = NULL; 3132 MPI_Comm comm; 3133 IS iscol_d,isrow_d,iscol_o; 3134 Mat Asub = NULL,Bsub = NULL; 3135 PetscInt n; 3136 3137 PetscFunctionBegin; 3138 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3139 3140 if (call == MAT_REUSE_MATRIX) { 3141 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3142 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3143 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3144 3145 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3146 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3147 3148 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3149 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3150 3151 /* Update diagonal and off-diagonal portions of submat */ 3152 asub = (Mat_MPIAIJ*)(*submat)->data; 3153 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3154 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3155 if (n) { 3156 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3157 } 3158 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160 3161 } else { /* call == MAT_INITIAL_MATRIX) */ 3162 const PetscInt *garray; 3163 PetscInt BsubN; 3164 3165 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3166 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3167 3168 /* Create local submatrices Asub and Bsub */ 3169 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3170 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3171 3172 /* Create submatrix M */ 3173 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3174 3175 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3176 asub = (Mat_MPIAIJ*)M->data; 3177 3178 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3179 n = asub->B->cmap->N; 3180 if (BsubN > n) { 3181 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3182 const PetscInt *idx; 3183 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3184 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3185 3186 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3187 j = 0; 3188 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3189 for (i=0; i<n; i++) { 3190 if (j >= BsubN) break; 3191 while (subgarray[i] > garray[j]) j++; 3192 3193 if (subgarray[i] == garray[j]) { 3194 idx_new[i] = idx[j++]; 3195 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3196 } 3197 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3198 3199 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3200 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3201 3202 } else if (BsubN < n) { 3203 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3204 } 3205 3206 ierr = PetscFree(garray);CHKERRQ(ierr); 3207 *submat = M; 3208 3209 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3210 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3211 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3212 3213 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3214 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3215 3216 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3217 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3218 } 3219 PetscFunctionReturn(0); 3220 } 3221 3222 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3223 { 3224 PetscErrorCode ierr; 3225 IS iscol_local=NULL,isrow_d; 3226 PetscInt csize; 3227 PetscInt n,i,j,start,end; 3228 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3229 MPI_Comm comm; 3230 3231 PetscFunctionBegin; 3232 /* If isrow has same processor distribution as mat, 3233 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3234 if (call == MAT_REUSE_MATRIX) { 3235 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3236 if (isrow_d) { 3237 sameRowDist = PETSC_TRUE; 3238 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3239 } else { 3240 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3241 if (iscol_local) { 3242 sameRowDist = PETSC_TRUE; 3243 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3244 } 3245 } 3246 } else { 3247 /* Check if isrow has same processor distribution as mat */ 3248 sameDist[0] = PETSC_FALSE; 3249 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3250 if (!n) { 3251 sameDist[0] = PETSC_TRUE; 3252 } else { 3253 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3254 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3255 if (i >= start && j < end) { 3256 sameDist[0] = PETSC_TRUE; 3257 } 3258 } 3259 3260 /* Check if iscol has same processor distribution as mat */ 3261 sameDist[1] = PETSC_FALSE; 3262 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3263 if (!n) { 3264 sameDist[1] = PETSC_TRUE; 3265 } else { 3266 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3267 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3268 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3269 } 3270 3271 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3272 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3273 sameRowDist = tsameDist[0]; 3274 } 3275 3276 if (sameRowDist) { 3277 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3278 /* isrow and iscol have same processor distribution as mat */ 3279 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3280 PetscFunctionReturn(0); 3281 } else { /* sameRowDist */ 3282 /* isrow has same processor distribution as mat */ 3283 if (call == MAT_INITIAL_MATRIX) { 3284 PetscBool sorted; 3285 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3286 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3287 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3288 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3289 3290 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3291 if (sorted) { 3292 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3293 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3294 PetscFunctionReturn(0); 3295 } 3296 } else { /* call == MAT_REUSE_MATRIX */ 3297 IS iscol_sub; 3298 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3299 if (iscol_sub) { 3300 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3301 PetscFunctionReturn(0); 3302 } 3303 } 3304 } 3305 } 3306 3307 /* General case: iscol -> iscol_local which has global size of iscol */ 3308 if (call == MAT_REUSE_MATRIX) { 3309 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3310 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3311 } else { 3312 if (!iscol_local) { 3313 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3314 } 3315 } 3316 3317 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3318 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3319 3320 if (call == MAT_INITIAL_MATRIX) { 3321 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3322 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3323 } 3324 PetscFunctionReturn(0); 3325 } 3326 3327 /*@C 3328 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3329 and "off-diagonal" part of the matrix in CSR format. 3330 3331 Collective 3332 3333 Input Parameters: 3334 + comm - MPI communicator 3335 . A - "diagonal" portion of matrix 3336 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3337 - garray - global index of B columns 3338 3339 Output Parameter: 3340 . mat - the matrix, with input A as its local diagonal matrix 3341 Level: advanced 3342 3343 Notes: 3344 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3345 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3346 3347 .seealso: MatCreateMPIAIJWithSplitArrays() 3348 @*/ 3349 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3350 { 3351 PetscErrorCode ierr; 3352 Mat_MPIAIJ *maij; 3353 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3354 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3355 PetscScalar *oa=b->a; 3356 Mat Bnew; 3357 PetscInt m,n,N; 3358 3359 PetscFunctionBegin; 3360 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3361 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3362 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3363 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3364 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3365 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3366 3367 /* Get global columns of mat */ 3368 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3369 3370 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3371 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3372 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3373 maij = (Mat_MPIAIJ*)(*mat)->data; 3374 3375 (*mat)->preallocated = PETSC_TRUE; 3376 3377 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3378 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3379 3380 /* Set A as diagonal portion of *mat */ 3381 maij->A = A; 3382 3383 nz = oi[m]; 3384 for (i=0; i<nz; i++) { 3385 col = oj[i]; 3386 oj[i] = garray[col]; 3387 } 3388 3389 /* Set Bnew as off-diagonal portion of *mat */ 3390 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3391 bnew = (Mat_SeqAIJ*)Bnew->data; 3392 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3393 maij->B = Bnew; 3394 3395 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3396 3397 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3398 b->free_a = PETSC_FALSE; 3399 b->free_ij = PETSC_FALSE; 3400 ierr = MatDestroy(&B);CHKERRQ(ierr); 3401 3402 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3403 bnew->free_a = PETSC_TRUE; 3404 bnew->free_ij = PETSC_TRUE; 3405 3406 /* condense columns of maij->B */ 3407 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3408 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3409 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3410 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3411 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3412 PetscFunctionReturn(0); 3413 } 3414 3415 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3416 3417 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3418 { 3419 PetscErrorCode ierr; 3420 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3421 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3422 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3423 Mat M,Msub,B=a->B; 3424 MatScalar *aa; 3425 Mat_SeqAIJ *aij; 3426 PetscInt *garray = a->garray,*colsub,Ncols; 3427 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3428 IS iscol_sub,iscmap; 3429 const PetscInt *is_idx,*cmap; 3430 PetscBool allcolumns=PETSC_FALSE; 3431 MPI_Comm comm; 3432 3433 PetscFunctionBegin; 3434 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3435 3436 if (call == MAT_REUSE_MATRIX) { 3437 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3438 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3439 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3440 3441 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3442 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3443 3444 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3445 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3446 3447 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3448 3449 } else { /* call == MAT_INITIAL_MATRIX) */ 3450 PetscBool flg; 3451 3452 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3453 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3454 3455 /* (1) iscol -> nonscalable iscol_local */ 3456 /* Check for special case: each processor gets entire matrix columns */ 3457 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3458 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3459 if (allcolumns) { 3460 iscol_sub = iscol_local; 3461 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3462 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3463 3464 } else { 3465 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3466 PetscInt *idx,*cmap1,k; 3467 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3468 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3469 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3470 count = 0; 3471 k = 0; 3472 for (i=0; i<Ncols; i++) { 3473 j = is_idx[i]; 3474 if (j >= cstart && j < cend) { 3475 /* diagonal part of mat */ 3476 idx[count] = j; 3477 cmap1[count++] = i; /* column index in submat */ 3478 } else if (Bn) { 3479 /* off-diagonal part of mat */ 3480 if (j == garray[k]) { 3481 idx[count] = j; 3482 cmap1[count++] = i; /* column index in submat */ 3483 } else if (j > garray[k]) { 3484 while (j > garray[k] && k < Bn-1) k++; 3485 if (j == garray[k]) { 3486 idx[count] = j; 3487 cmap1[count++] = i; /* column index in submat */ 3488 } 3489 } 3490 } 3491 } 3492 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3493 3494 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3495 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3496 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3497 3498 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3499 } 3500 3501 /* (3) Create sequential Msub */ 3502 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3503 } 3504 3505 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3506 aij = (Mat_SeqAIJ*)(Msub)->data; 3507 ii = aij->i; 3508 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3509 3510 /* 3511 m - number of local rows 3512 Ncols - number of columns (same on all processors) 3513 rstart - first row in new global matrix generated 3514 */ 3515 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3516 3517 if (call == MAT_INITIAL_MATRIX) { 3518 /* (4) Create parallel newmat */ 3519 PetscMPIInt rank,size; 3520 PetscInt csize; 3521 3522 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3523 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3524 3525 /* 3526 Determine the number of non-zeros in the diagonal and off-diagonal 3527 portions of the matrix in order to do correct preallocation 3528 */ 3529 3530 /* first get start and end of "diagonal" columns */ 3531 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3532 if (csize == PETSC_DECIDE) { 3533 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3534 if (mglobal == Ncols) { /* square matrix */ 3535 nlocal = m; 3536 } else { 3537 nlocal = Ncols/size + ((Ncols % size) > rank); 3538 } 3539 } else { 3540 nlocal = csize; 3541 } 3542 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3543 rstart = rend - nlocal; 3544 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3545 3546 /* next, compute all the lengths */ 3547 jj = aij->j; 3548 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3549 olens = dlens + m; 3550 for (i=0; i<m; i++) { 3551 jend = ii[i+1] - ii[i]; 3552 olen = 0; 3553 dlen = 0; 3554 for (j=0; j<jend; j++) { 3555 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3556 else dlen++; 3557 jj++; 3558 } 3559 olens[i] = olen; 3560 dlens[i] = dlen; 3561 } 3562 3563 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3564 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3565 3566 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3567 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3568 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3569 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3570 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3571 ierr = PetscFree(dlens);CHKERRQ(ierr); 3572 3573 } else { /* call == MAT_REUSE_MATRIX */ 3574 M = *newmat; 3575 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3576 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3577 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3578 /* 3579 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3580 rather than the slower MatSetValues(). 3581 */ 3582 M->was_assembled = PETSC_TRUE; 3583 M->assembled = PETSC_FALSE; 3584 } 3585 3586 /* (5) Set values of Msub to *newmat */ 3587 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3588 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3589 3590 jj = aij->j; 3591 aa = aij->a; 3592 for (i=0; i<m; i++) { 3593 row = rstart + i; 3594 nz = ii[i+1] - ii[i]; 3595 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3596 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3597 jj += nz; aa += nz; 3598 } 3599 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3600 3601 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3602 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3603 3604 ierr = PetscFree(colsub);CHKERRQ(ierr); 3605 3606 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3607 if (call == MAT_INITIAL_MATRIX) { 3608 *newmat = M; 3609 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3610 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3611 3612 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3613 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3614 3615 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3616 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3617 3618 if (iscol_local) { 3619 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3620 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3621 } 3622 } 3623 PetscFunctionReturn(0); 3624 } 3625 3626 /* 3627 Not great since it makes two copies of the submatrix, first an SeqAIJ 3628 in local and then by concatenating the local matrices the end result. 3629 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3630 3631 Note: This requires a sequential iscol with all indices. 3632 */ 3633 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3634 { 3635 PetscErrorCode ierr; 3636 PetscMPIInt rank,size; 3637 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3638 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3639 Mat M,Mreuse; 3640 MatScalar *aa,*vwork; 3641 MPI_Comm comm; 3642 Mat_SeqAIJ *aij; 3643 PetscBool colflag,allcolumns=PETSC_FALSE; 3644 3645 PetscFunctionBegin; 3646 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3647 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3648 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3649 3650 /* Check for special case: each processor gets entire matrix columns */ 3651 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3652 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3653 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3654 3655 if (call == MAT_REUSE_MATRIX) { 3656 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3657 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3658 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3659 } else { 3660 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3661 } 3662 3663 /* 3664 m - number of local rows 3665 n - number of columns (same on all processors) 3666 rstart - first row in new global matrix generated 3667 */ 3668 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3669 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3670 if (call == MAT_INITIAL_MATRIX) { 3671 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3672 ii = aij->i; 3673 jj = aij->j; 3674 3675 /* 3676 Determine the number of non-zeros in the diagonal and off-diagonal 3677 portions of the matrix in order to do correct preallocation 3678 */ 3679 3680 /* first get start and end of "diagonal" columns */ 3681 if (csize == PETSC_DECIDE) { 3682 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3683 if (mglobal == n) { /* square matrix */ 3684 nlocal = m; 3685 } else { 3686 nlocal = n/size + ((n % size) > rank); 3687 } 3688 } else { 3689 nlocal = csize; 3690 } 3691 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3692 rstart = rend - nlocal; 3693 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3694 3695 /* next, compute all the lengths */ 3696 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3697 olens = dlens + m; 3698 for (i=0; i<m; i++) { 3699 jend = ii[i+1] - ii[i]; 3700 olen = 0; 3701 dlen = 0; 3702 for (j=0; j<jend; j++) { 3703 if (*jj < rstart || *jj >= rend) olen++; 3704 else dlen++; 3705 jj++; 3706 } 3707 olens[i] = olen; 3708 dlens[i] = dlen; 3709 } 3710 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3711 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3712 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3713 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3714 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3715 ierr = PetscFree(dlens);CHKERRQ(ierr); 3716 } else { 3717 PetscInt ml,nl; 3718 3719 M = *newmat; 3720 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3721 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3722 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3723 /* 3724 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3725 rather than the slower MatSetValues(). 3726 */ 3727 M->was_assembled = PETSC_TRUE; 3728 M->assembled = PETSC_FALSE; 3729 } 3730 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3731 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3732 ii = aij->i; 3733 jj = aij->j; 3734 aa = aij->a; 3735 for (i=0; i<m; i++) { 3736 row = rstart + i; 3737 nz = ii[i+1] - ii[i]; 3738 cwork = jj; jj += nz; 3739 vwork = aa; aa += nz; 3740 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3741 } 3742 3743 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3744 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3745 *newmat = M; 3746 3747 /* save submatrix used in processor for next request */ 3748 if (call == MAT_INITIAL_MATRIX) { 3749 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3750 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3751 } 3752 PetscFunctionReturn(0); 3753 } 3754 3755 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3756 { 3757 PetscInt m,cstart, cend,j,nnz,i,d; 3758 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3759 const PetscInt *JJ; 3760 PetscErrorCode ierr; 3761 PetscBool nooffprocentries; 3762 3763 PetscFunctionBegin; 3764 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3765 3766 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3767 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3768 m = B->rmap->n; 3769 cstart = B->cmap->rstart; 3770 cend = B->cmap->rend; 3771 rstart = B->rmap->rstart; 3772 3773 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3774 3775 #if defined(PETSC_USE_DEBUG) 3776 for (i=0; i<m; i++) { 3777 nnz = Ii[i+1]- Ii[i]; 3778 JJ = J + Ii[i]; 3779 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3780 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3781 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3782 } 3783 #endif 3784 3785 for (i=0; i<m; i++) { 3786 nnz = Ii[i+1]- Ii[i]; 3787 JJ = J + Ii[i]; 3788 nnz_max = PetscMax(nnz_max,nnz); 3789 d = 0; 3790 for (j=0; j<nnz; j++) { 3791 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3792 } 3793 d_nnz[i] = d; 3794 o_nnz[i] = nnz - d; 3795 } 3796 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3797 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3798 3799 for (i=0; i<m; i++) { 3800 ii = i + rstart; 3801 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3802 } 3803 nooffprocentries = B->nooffprocentries; 3804 B->nooffprocentries = PETSC_TRUE; 3805 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3806 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3807 B->nooffprocentries = nooffprocentries; 3808 3809 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3810 PetscFunctionReturn(0); 3811 } 3812 3813 /*@ 3814 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3815 (the default parallel PETSc format). 3816 3817 Collective 3818 3819 Input Parameters: 3820 + B - the matrix 3821 . i - the indices into j for the start of each local row (starts with zero) 3822 . j - the column indices for each local row (starts with zero) 3823 - v - optional values in the matrix 3824 3825 Level: developer 3826 3827 Notes: 3828 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3829 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3830 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3831 3832 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3833 3834 The format which is used for the sparse matrix input, is equivalent to a 3835 row-major ordering.. i.e for the following matrix, the input data expected is 3836 as shown 3837 3838 $ 1 0 0 3839 $ 2 0 3 P0 3840 $ ------- 3841 $ 4 5 6 P1 3842 $ 3843 $ Process0 [P0]: rows_owned=[0,1] 3844 $ i = {0,1,3} [size = nrow+1 = 2+1] 3845 $ j = {0,0,2} [size = 3] 3846 $ v = {1,2,3} [size = 3] 3847 $ 3848 $ Process1 [P1]: rows_owned=[2] 3849 $ i = {0,3} [size = nrow+1 = 1+1] 3850 $ j = {0,1,2} [size = 3] 3851 $ v = {4,5,6} [size = 3] 3852 3853 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3854 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3855 @*/ 3856 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3857 { 3858 PetscErrorCode ierr; 3859 3860 PetscFunctionBegin; 3861 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3862 PetscFunctionReturn(0); 3863 } 3864 3865 /*@C 3866 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3867 (the default parallel PETSc format). For good matrix assembly performance 3868 the user should preallocate the matrix storage by setting the parameters 3869 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3870 performance can be increased by more than a factor of 50. 3871 3872 Collective 3873 3874 Input Parameters: 3875 + B - the matrix 3876 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3877 (same value is used for all local rows) 3878 . d_nnz - array containing the number of nonzeros in the various rows of the 3879 DIAGONAL portion of the local submatrix (possibly different for each row) 3880 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3881 The size of this array is equal to the number of local rows, i.e 'm'. 3882 For matrices that will be factored, you must leave room for (and set) 3883 the diagonal entry even if it is zero. 3884 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3885 submatrix (same value is used for all local rows). 3886 - o_nnz - array containing the number of nonzeros in the various rows of the 3887 OFF-DIAGONAL portion of the local submatrix (possibly different for 3888 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3889 structure. The size of this array is equal to the number 3890 of local rows, i.e 'm'. 3891 3892 If the *_nnz parameter is given then the *_nz parameter is ignored 3893 3894 The AIJ format (also called the Yale sparse matrix format or 3895 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3896 storage. The stored row and column indices begin with zero. 3897 See Users-Manual: ch_mat for details. 3898 3899 The parallel matrix is partitioned such that the first m0 rows belong to 3900 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3901 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3902 3903 The DIAGONAL portion of the local submatrix of a processor can be defined 3904 as the submatrix which is obtained by extraction the part corresponding to 3905 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3906 first row that belongs to the processor, r2 is the last row belonging to 3907 the this processor, and c1-c2 is range of indices of the local part of a 3908 vector suitable for applying the matrix to. This is an mxn matrix. In the 3909 common case of a square matrix, the row and column ranges are the same and 3910 the DIAGONAL part is also square. The remaining portion of the local 3911 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3912 3913 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3914 3915 You can call MatGetInfo() to get information on how effective the preallocation was; 3916 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3917 You can also run with the option -info and look for messages with the string 3918 malloc in them to see if additional memory allocation was needed. 3919 3920 Example usage: 3921 3922 Consider the following 8x8 matrix with 34 non-zero values, that is 3923 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3924 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3925 as follows: 3926 3927 .vb 3928 1 2 0 | 0 3 0 | 0 4 3929 Proc0 0 5 6 | 7 0 0 | 8 0 3930 9 0 10 | 11 0 0 | 12 0 3931 ------------------------------------- 3932 13 0 14 | 15 16 17 | 0 0 3933 Proc1 0 18 0 | 19 20 21 | 0 0 3934 0 0 0 | 22 23 0 | 24 0 3935 ------------------------------------- 3936 Proc2 25 26 27 | 0 0 28 | 29 0 3937 30 0 0 | 31 32 33 | 0 34 3938 .ve 3939 3940 This can be represented as a collection of submatrices as: 3941 3942 .vb 3943 A B C 3944 D E F 3945 G H I 3946 .ve 3947 3948 Where the submatrices A,B,C are owned by proc0, D,E,F are 3949 owned by proc1, G,H,I are owned by proc2. 3950 3951 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3952 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3953 The 'M','N' parameters are 8,8, and have the same values on all procs. 3954 3955 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3956 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3957 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3958 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3959 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3960 matrix, ans [DF] as another SeqAIJ matrix. 3961 3962 When d_nz, o_nz parameters are specified, d_nz storage elements are 3963 allocated for every row of the local diagonal submatrix, and o_nz 3964 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3965 One way to choose d_nz and o_nz is to use the max nonzerors per local 3966 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3967 In this case, the values of d_nz,o_nz are: 3968 .vb 3969 proc0 : dnz = 2, o_nz = 2 3970 proc1 : dnz = 3, o_nz = 2 3971 proc2 : dnz = 1, o_nz = 4 3972 .ve 3973 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3974 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3975 for proc3. i.e we are using 12+15+10=37 storage locations to store 3976 34 values. 3977 3978 When d_nnz, o_nnz parameters are specified, the storage is specified 3979 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3980 In the above case the values for d_nnz,o_nnz are: 3981 .vb 3982 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3983 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3984 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3985 .ve 3986 Here the space allocated is sum of all the above values i.e 34, and 3987 hence pre-allocation is perfect. 3988 3989 Level: intermediate 3990 3991 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3992 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3993 @*/ 3994 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3995 { 3996 PetscErrorCode ierr; 3997 3998 PetscFunctionBegin; 3999 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4000 PetscValidType(B,1); 4001 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4002 PetscFunctionReturn(0); 4003 } 4004 4005 /*@ 4006 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4007 CSR format for the local rows. 4008 4009 Collective 4010 4011 Input Parameters: 4012 + comm - MPI communicator 4013 . m - number of local rows (Cannot be PETSC_DECIDE) 4014 . n - This value should be the same as the local size used in creating the 4015 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4016 calculated if N is given) For square matrices n is almost always m. 4017 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4018 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4019 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4020 . j - column indices 4021 - a - matrix values 4022 4023 Output Parameter: 4024 . mat - the matrix 4025 4026 Level: intermediate 4027 4028 Notes: 4029 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4030 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4031 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4032 4033 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4034 4035 The format which is used for the sparse matrix input, is equivalent to a 4036 row-major ordering.. i.e for the following matrix, the input data expected is 4037 as shown 4038 4039 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4040 4041 $ 1 0 0 4042 $ 2 0 3 P0 4043 $ ------- 4044 $ 4 5 6 P1 4045 $ 4046 $ Process0 [P0]: rows_owned=[0,1] 4047 $ i = {0,1,3} [size = nrow+1 = 2+1] 4048 $ j = {0,0,2} [size = 3] 4049 $ v = {1,2,3} [size = 3] 4050 $ 4051 $ Process1 [P1]: rows_owned=[2] 4052 $ i = {0,3} [size = nrow+1 = 1+1] 4053 $ j = {0,1,2} [size = 3] 4054 $ v = {4,5,6} [size = 3] 4055 4056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4057 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4058 @*/ 4059 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4060 { 4061 PetscErrorCode ierr; 4062 4063 PetscFunctionBegin; 4064 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4065 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4066 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4067 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4068 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4069 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4070 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4071 PetscFunctionReturn(0); 4072 } 4073 4074 /*@ 4075 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4076 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4077 4078 Collective 4079 4080 Input Parameters: 4081 + mat - the matrix 4082 . m - number of local rows (Cannot be PETSC_DECIDE) 4083 . n - This value should be the same as the local size used in creating the 4084 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4085 calculated if N is given) For square matrices n is almost always m. 4086 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4087 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4088 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4089 . J - column indices 4090 - v - matrix values 4091 4092 Level: intermediate 4093 4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4095 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4096 @*/ 4097 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4098 { 4099 PetscErrorCode ierr; 4100 PetscInt cstart,nnz,i,j; 4101 PetscInt *ld; 4102 PetscBool nooffprocentries; 4103 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4104 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4105 PetscScalar *ad = Ad->a, *ao = Ao->a; 4106 const PetscInt *Adi = Ad->i; 4107 PetscInt ldi,Iii,md; 4108 4109 PetscFunctionBegin; 4110 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4111 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4112 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4113 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4114 4115 cstart = mat->cmap->rstart; 4116 if (!Aij->ld) { 4117 /* count number of entries below block diagonal */ 4118 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4119 Aij->ld = ld; 4120 for (i=0; i<m; i++) { 4121 nnz = Ii[i+1]- Ii[i]; 4122 j = 0; 4123 while (J[j] < cstart && j < nnz) {j++;} 4124 J += nnz; 4125 ld[i] = j; 4126 } 4127 } else { 4128 ld = Aij->ld; 4129 } 4130 4131 for (i=0; i<m; i++) { 4132 nnz = Ii[i+1]- Ii[i]; 4133 Iii = Ii[i]; 4134 ldi = ld[i]; 4135 md = Adi[i+1]-Adi[i]; 4136 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4137 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4138 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4139 ad += md; 4140 ao += nnz - md; 4141 } 4142 nooffprocentries = mat->nooffprocentries; 4143 mat->nooffprocentries = PETSC_TRUE; 4144 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4145 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4146 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4147 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4148 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4149 mat->nooffprocentries = nooffprocentries; 4150 PetscFunctionReturn(0); 4151 } 4152 4153 /*@C 4154 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4155 (the default parallel PETSc format). For good matrix assembly performance 4156 the user should preallocate the matrix storage by setting the parameters 4157 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4158 performance can be increased by more than a factor of 50. 4159 4160 Collective 4161 4162 Input Parameters: 4163 + comm - MPI communicator 4164 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4165 This value should be the same as the local size used in creating the 4166 y vector for the matrix-vector product y = Ax. 4167 . n - This value should be the same as the local size used in creating the 4168 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4169 calculated if N is given) For square matrices n is almost always m. 4170 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4171 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4172 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4173 (same value is used for all local rows) 4174 . d_nnz - array containing the number of nonzeros in the various rows of the 4175 DIAGONAL portion of the local submatrix (possibly different for each row) 4176 or NULL, if d_nz is used to specify the nonzero structure. 4177 The size of this array is equal to the number of local rows, i.e 'm'. 4178 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4179 submatrix (same value is used for all local rows). 4180 - o_nnz - array containing the number of nonzeros in the various rows of the 4181 OFF-DIAGONAL portion of the local submatrix (possibly different for 4182 each row) or NULL, if o_nz is used to specify the nonzero 4183 structure. The size of this array is equal to the number 4184 of local rows, i.e 'm'. 4185 4186 Output Parameter: 4187 . A - the matrix 4188 4189 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4190 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4191 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4192 4193 Notes: 4194 If the *_nnz parameter is given then the *_nz parameter is ignored 4195 4196 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4197 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4198 storage requirements for this matrix. 4199 4200 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4201 processor than it must be used on all processors that share the object for 4202 that argument. 4203 4204 The user MUST specify either the local or global matrix dimensions 4205 (possibly both). 4206 4207 The parallel matrix is partitioned across processors such that the 4208 first m0 rows belong to process 0, the next m1 rows belong to 4209 process 1, the next m2 rows belong to process 2 etc.. where 4210 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4211 values corresponding to [m x N] submatrix. 4212 4213 The columns are logically partitioned with the n0 columns belonging 4214 to 0th partition, the next n1 columns belonging to the next 4215 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4216 4217 The DIAGONAL portion of the local submatrix on any given processor 4218 is the submatrix corresponding to the rows and columns m,n 4219 corresponding to the given processor. i.e diagonal matrix on 4220 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4221 etc. The remaining portion of the local submatrix [m x (N-n)] 4222 constitute the OFF-DIAGONAL portion. The example below better 4223 illustrates this concept. 4224 4225 For a square global matrix we define each processor's diagonal portion 4226 to be its local rows and the corresponding columns (a square submatrix); 4227 each processor's off-diagonal portion encompasses the remainder of the 4228 local matrix (a rectangular submatrix). 4229 4230 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4231 4232 When calling this routine with a single process communicator, a matrix of 4233 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4234 type of communicator, use the construction mechanism 4235 .vb 4236 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4237 .ve 4238 4239 $ MatCreate(...,&A); 4240 $ MatSetType(A,MATMPIAIJ); 4241 $ MatSetSizes(A, m,n,M,N); 4242 $ MatMPIAIJSetPreallocation(A,...); 4243 4244 By default, this format uses inodes (identical nodes) when possible. 4245 We search for consecutive rows with the same nonzero structure, thereby 4246 reusing matrix information to achieve increased efficiency. 4247 4248 Options Database Keys: 4249 + -mat_no_inode - Do not use inodes 4250 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4251 4252 4253 4254 Example usage: 4255 4256 Consider the following 8x8 matrix with 34 non-zero values, that is 4257 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4258 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4259 as follows 4260 4261 .vb 4262 1 2 0 | 0 3 0 | 0 4 4263 Proc0 0 5 6 | 7 0 0 | 8 0 4264 9 0 10 | 11 0 0 | 12 0 4265 ------------------------------------- 4266 13 0 14 | 15 16 17 | 0 0 4267 Proc1 0 18 0 | 19 20 21 | 0 0 4268 0 0 0 | 22 23 0 | 24 0 4269 ------------------------------------- 4270 Proc2 25 26 27 | 0 0 28 | 29 0 4271 30 0 0 | 31 32 33 | 0 34 4272 .ve 4273 4274 This can be represented as a collection of submatrices as 4275 4276 .vb 4277 A B C 4278 D E F 4279 G H I 4280 .ve 4281 4282 Where the submatrices A,B,C are owned by proc0, D,E,F are 4283 owned by proc1, G,H,I are owned by proc2. 4284 4285 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4286 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4287 The 'M','N' parameters are 8,8, and have the same values on all procs. 4288 4289 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4290 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4291 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4292 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4293 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4294 matrix, ans [DF] as another SeqAIJ matrix. 4295 4296 When d_nz, o_nz parameters are specified, d_nz storage elements are 4297 allocated for every row of the local diagonal submatrix, and o_nz 4298 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4299 One way to choose d_nz and o_nz is to use the max nonzerors per local 4300 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4301 In this case, the values of d_nz,o_nz are 4302 .vb 4303 proc0 : dnz = 2, o_nz = 2 4304 proc1 : dnz = 3, o_nz = 2 4305 proc2 : dnz = 1, o_nz = 4 4306 .ve 4307 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4308 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4309 for proc3. i.e we are using 12+15+10=37 storage locations to store 4310 34 values. 4311 4312 When d_nnz, o_nnz parameters are specified, the storage is specified 4313 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4314 In the above case the values for d_nnz,o_nnz are 4315 .vb 4316 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4317 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4318 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4319 .ve 4320 Here the space allocated is sum of all the above values i.e 34, and 4321 hence pre-allocation is perfect. 4322 4323 Level: intermediate 4324 4325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4326 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4327 @*/ 4328 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4329 { 4330 PetscErrorCode ierr; 4331 PetscMPIInt size; 4332 4333 PetscFunctionBegin; 4334 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4335 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4336 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4337 if (size > 1) { 4338 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4339 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4340 } else { 4341 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4342 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4343 } 4344 PetscFunctionReturn(0); 4345 } 4346 4347 /*@C 4348 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4349 4350 Not collective 4351 4352 Input Parameter: 4353 . A - The MPIAIJ matrix 4354 4355 Output Parameters: 4356 + Ad - The local diagonal block as a SeqAIJ matrix 4357 . Ao - The local off-diagonal block as a SeqAIJ matrix 4358 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4359 4360 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4361 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4362 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4363 local column numbers to global column numbers in the original matrix. 4364 4365 Level: intermediate 4366 4367 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ 4368 @*/ 4369 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4370 { 4371 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4372 PetscBool flg; 4373 PetscErrorCode ierr; 4374 4375 PetscFunctionBegin; 4376 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4377 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4378 if (Ad) *Ad = a->A; 4379 if (Ao) *Ao = a->B; 4380 if (colmap) *colmap = a->garray; 4381 PetscFunctionReturn(0); 4382 } 4383 4384 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4385 { 4386 PetscErrorCode ierr; 4387 PetscInt m,N,i,rstart,nnz,Ii; 4388 PetscInt *indx; 4389 PetscScalar *values; 4390 4391 PetscFunctionBegin; 4392 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4393 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4394 PetscInt *dnz,*onz,sum,bs,cbs; 4395 4396 if (n == PETSC_DECIDE) { 4397 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4398 } 4399 /* Check sum(n) = N */ 4400 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4401 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4402 4403 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4404 rstart -= m; 4405 4406 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4407 for (i=0; i<m; i++) { 4408 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4409 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4410 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4411 } 4412 4413 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4414 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4415 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4416 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4417 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4418 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4419 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4420 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4421 } 4422 4423 /* numeric phase */ 4424 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4425 for (i=0; i<m; i++) { 4426 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4427 Ii = i + rstart; 4428 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4429 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4430 } 4431 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4432 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4437 { 4438 PetscErrorCode ierr; 4439 PetscMPIInt rank; 4440 PetscInt m,N,i,rstart,nnz; 4441 size_t len; 4442 const PetscInt *indx; 4443 PetscViewer out; 4444 char *name; 4445 Mat B; 4446 const PetscScalar *values; 4447 4448 PetscFunctionBegin; 4449 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4450 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4451 /* Should this be the type of the diagonal block of A? */ 4452 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4453 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4454 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4455 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4456 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4457 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4458 for (i=0; i<m; i++) { 4459 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4460 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4461 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4462 } 4463 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4464 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4465 4466 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4467 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4468 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4469 sprintf(name,"%s.%d",outfile,rank); 4470 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4471 ierr = PetscFree(name);CHKERRQ(ierr); 4472 ierr = MatView(B,out);CHKERRQ(ierr); 4473 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4474 ierr = MatDestroy(&B);CHKERRQ(ierr); 4475 PetscFunctionReturn(0); 4476 } 4477 4478 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4479 { 4480 PetscErrorCode ierr; 4481 Mat_Merge_SeqsToMPI *merge; 4482 PetscContainer container; 4483 4484 PetscFunctionBegin; 4485 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4486 if (container) { 4487 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4500 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4501 ierr = PetscFree(merge);CHKERRQ(ierr); 4502 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4503 } 4504 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4505 PetscFunctionReturn(0); 4506 } 4507 4508 #include <../src/mat/utils/freespace.h> 4509 #include <petscbt.h> 4510 4511 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4512 { 4513 PetscErrorCode ierr; 4514 MPI_Comm comm; 4515 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4516 PetscMPIInt size,rank,taga,*len_s; 4517 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4518 PetscInt proc,m; 4519 PetscInt **buf_ri,**buf_rj; 4520 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4521 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4522 MPI_Request *s_waits,*r_waits; 4523 MPI_Status *status; 4524 MatScalar *aa=a->a; 4525 MatScalar **abuf_r,*ba_i; 4526 Mat_Merge_SeqsToMPI *merge; 4527 PetscContainer container; 4528 4529 PetscFunctionBegin; 4530 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4531 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4532 4533 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4534 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4535 4536 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4537 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4538 4539 bi = merge->bi; 4540 bj = merge->bj; 4541 buf_ri = merge->buf_ri; 4542 buf_rj = merge->buf_rj; 4543 4544 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4545 owners = merge->rowmap->range; 4546 len_s = merge->len_s; 4547 4548 /* send and recv matrix values */ 4549 /*-----------------------------*/ 4550 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4551 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4552 4553 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4554 for (proc=0,k=0; proc<size; proc++) { 4555 if (!len_s[proc]) continue; 4556 i = owners[proc]; 4557 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4558 k++; 4559 } 4560 4561 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4562 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4563 ierr = PetscFree(status);CHKERRQ(ierr); 4564 4565 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4566 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4567 4568 /* insert mat values of mpimat */ 4569 /*----------------------------*/ 4570 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4571 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4572 4573 for (k=0; k<merge->nrecv; k++) { 4574 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4575 nrows = *(buf_ri_k[k]); 4576 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4577 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4578 } 4579 4580 /* set values of ba */ 4581 m = merge->rowmap->n; 4582 for (i=0; i<m; i++) { 4583 arow = owners[rank] + i; 4584 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4585 bnzi = bi[i+1] - bi[i]; 4586 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4587 4588 /* add local non-zero vals of this proc's seqmat into ba */ 4589 anzi = ai[arow+1] - ai[arow]; 4590 aj = a->j + ai[arow]; 4591 aa = a->a + ai[arow]; 4592 nextaj = 0; 4593 for (j=0; nextaj<anzi; j++) { 4594 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4595 ba_i[j] += aa[nextaj++]; 4596 } 4597 } 4598 4599 /* add received vals into ba */ 4600 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4601 /* i-th row */ 4602 if (i == *nextrow[k]) { 4603 anzi = *(nextai[k]+1) - *nextai[k]; 4604 aj = buf_rj[k] + *(nextai[k]); 4605 aa = abuf_r[k] + *(nextai[k]); 4606 nextaj = 0; 4607 for (j=0; nextaj<anzi; j++) { 4608 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4609 ba_i[j] += aa[nextaj++]; 4610 } 4611 } 4612 nextrow[k]++; nextai[k]++; 4613 } 4614 } 4615 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4616 } 4617 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4618 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4619 4620 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4621 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4622 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4623 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4624 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4625 PetscFunctionReturn(0); 4626 } 4627 4628 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4629 { 4630 PetscErrorCode ierr; 4631 Mat B_mpi; 4632 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4633 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4634 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4635 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4636 PetscInt len,proc,*dnz,*onz,bs,cbs; 4637 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4638 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4639 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4640 MPI_Status *status; 4641 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4642 PetscBT lnkbt; 4643 Mat_Merge_SeqsToMPI *merge; 4644 PetscContainer container; 4645 4646 PetscFunctionBegin; 4647 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4648 4649 /* make sure it is a PETSc comm */ 4650 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4651 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4652 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4653 4654 ierr = PetscNew(&merge);CHKERRQ(ierr); 4655 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4656 4657 /* determine row ownership */ 4658 /*---------------------------------------------------------*/ 4659 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4660 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4661 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4662 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4663 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4664 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4665 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4666 4667 m = merge->rowmap->n; 4668 owners = merge->rowmap->range; 4669 4670 /* determine the number of messages to send, their lengths */ 4671 /*---------------------------------------------------------*/ 4672 len_s = merge->len_s; 4673 4674 len = 0; /* length of buf_si[] */ 4675 merge->nsend = 0; 4676 for (proc=0; proc<size; proc++) { 4677 len_si[proc] = 0; 4678 if (proc == rank) { 4679 len_s[proc] = 0; 4680 } else { 4681 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4682 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4683 } 4684 if (len_s[proc]) { 4685 merge->nsend++; 4686 nrows = 0; 4687 for (i=owners[proc]; i<owners[proc+1]; i++) { 4688 if (ai[i+1] > ai[i]) nrows++; 4689 } 4690 len_si[proc] = 2*(nrows+1); 4691 len += len_si[proc]; 4692 } 4693 } 4694 4695 /* determine the number and length of messages to receive for ij-structure */ 4696 /*-------------------------------------------------------------------------*/ 4697 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4698 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4699 4700 /* post the Irecv of j-structure */ 4701 /*-------------------------------*/ 4702 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4703 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4704 4705 /* post the Isend of j-structure */ 4706 /*--------------------------------*/ 4707 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4708 4709 for (proc=0, k=0; proc<size; proc++) { 4710 if (!len_s[proc]) continue; 4711 i = owners[proc]; 4712 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4713 k++; 4714 } 4715 4716 /* receives and sends of j-structure are complete */ 4717 /*------------------------------------------------*/ 4718 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4719 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4720 4721 /* send and recv i-structure */ 4722 /*---------------------------*/ 4723 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4724 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4725 4726 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4727 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4728 for (proc=0,k=0; proc<size; proc++) { 4729 if (!len_s[proc]) continue; 4730 /* form outgoing message for i-structure: 4731 buf_si[0]: nrows to be sent 4732 [1:nrows]: row index (global) 4733 [nrows+1:2*nrows+1]: i-structure index 4734 */ 4735 /*-------------------------------------------*/ 4736 nrows = len_si[proc]/2 - 1; 4737 buf_si_i = buf_si + nrows+1; 4738 buf_si[0] = nrows; 4739 buf_si_i[0] = 0; 4740 nrows = 0; 4741 for (i=owners[proc]; i<owners[proc+1]; i++) { 4742 anzi = ai[i+1] - ai[i]; 4743 if (anzi) { 4744 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4745 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4746 nrows++; 4747 } 4748 } 4749 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4750 k++; 4751 buf_si += len_si[proc]; 4752 } 4753 4754 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4755 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4756 4757 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4758 for (i=0; i<merge->nrecv; i++) { 4759 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4760 } 4761 4762 ierr = PetscFree(len_si);CHKERRQ(ierr); 4763 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4764 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4765 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4766 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4767 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4768 ierr = PetscFree(status);CHKERRQ(ierr); 4769 4770 /* compute a local seq matrix in each processor */ 4771 /*----------------------------------------------*/ 4772 /* allocate bi array and free space for accumulating nonzero column info */ 4773 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4774 bi[0] = 0; 4775 4776 /* create and initialize a linked list */ 4777 nlnk = N+1; 4778 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4779 4780 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4781 len = ai[owners[rank+1]] - ai[owners[rank]]; 4782 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4783 4784 current_space = free_space; 4785 4786 /* determine symbolic info for each local row */ 4787 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4788 4789 for (k=0; k<merge->nrecv; k++) { 4790 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4791 nrows = *buf_ri_k[k]; 4792 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4793 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4794 } 4795 4796 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4797 len = 0; 4798 for (i=0; i<m; i++) { 4799 bnzi = 0; 4800 /* add local non-zero cols of this proc's seqmat into lnk */ 4801 arow = owners[rank] + i; 4802 anzi = ai[arow+1] - ai[arow]; 4803 aj = a->j + ai[arow]; 4804 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4805 bnzi += nlnk; 4806 /* add received col data into lnk */ 4807 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4808 if (i == *nextrow[k]) { /* i-th row */ 4809 anzi = *(nextai[k]+1) - *nextai[k]; 4810 aj = buf_rj[k] + *nextai[k]; 4811 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4812 bnzi += nlnk; 4813 nextrow[k]++; nextai[k]++; 4814 } 4815 } 4816 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4817 4818 /* if free space is not available, make more free space */ 4819 if (current_space->local_remaining<bnzi) { 4820 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4821 nspacedouble++; 4822 } 4823 /* copy data into free space, then initialize lnk */ 4824 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4825 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4826 4827 current_space->array += bnzi; 4828 current_space->local_used += bnzi; 4829 current_space->local_remaining -= bnzi; 4830 4831 bi[i+1] = bi[i] + bnzi; 4832 } 4833 4834 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4835 4836 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4837 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4838 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4839 4840 /* create symbolic parallel matrix B_mpi */ 4841 /*---------------------------------------*/ 4842 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4843 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4844 if (n==PETSC_DECIDE) { 4845 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4846 } else { 4847 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4848 } 4849 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4850 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4851 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4852 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4853 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4854 4855 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4856 B_mpi->assembled = PETSC_FALSE; 4857 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4858 merge->bi = bi; 4859 merge->bj = bj; 4860 merge->buf_ri = buf_ri; 4861 merge->buf_rj = buf_rj; 4862 merge->coi = NULL; 4863 merge->coj = NULL; 4864 merge->owners_co = NULL; 4865 4866 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4867 4868 /* attach the supporting struct to B_mpi for reuse */ 4869 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4870 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4871 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4872 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4873 *mpimat = B_mpi; 4874 4875 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4876 PetscFunctionReturn(0); 4877 } 4878 4879 /*@C 4880 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4881 matrices from each processor 4882 4883 Collective 4884 4885 Input Parameters: 4886 + comm - the communicators the parallel matrix will live on 4887 . seqmat - the input sequential matrices 4888 . m - number of local rows (or PETSC_DECIDE) 4889 . n - number of local columns (or PETSC_DECIDE) 4890 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4891 4892 Output Parameter: 4893 . mpimat - the parallel matrix generated 4894 4895 Level: advanced 4896 4897 Notes: 4898 The dimensions of the sequential matrix in each processor MUST be the same. 4899 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4900 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4901 @*/ 4902 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4903 { 4904 PetscErrorCode ierr; 4905 PetscMPIInt size; 4906 4907 PetscFunctionBegin; 4908 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4909 if (size == 1) { 4910 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4911 if (scall == MAT_INITIAL_MATRIX) { 4912 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4913 } else { 4914 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4915 } 4916 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4917 PetscFunctionReturn(0); 4918 } 4919 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 if (scall == MAT_INITIAL_MATRIX) { 4921 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4922 } 4923 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4924 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 4928 /*@ 4929 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4930 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4931 with MatGetSize() 4932 4933 Not Collective 4934 4935 Input Parameters: 4936 + A - the matrix 4937 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4938 4939 Output Parameter: 4940 . A_loc - the local sequential matrix generated 4941 4942 Level: developer 4943 4944 Notes: 4945 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4946 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4947 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4948 modify the values of the returned A_loc. 4949 4950 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4951 4952 @*/ 4953 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4954 { 4955 PetscErrorCode ierr; 4956 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4957 Mat_SeqAIJ *mat,*a,*b; 4958 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4959 MatScalar *aa,*ba,*cam; 4960 PetscScalar *ca; 4961 PetscMPIInt size; 4962 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4963 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4964 PetscBool match; 4965 4966 PetscFunctionBegin; 4967 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4968 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4969 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4970 if (size == 1) { 4971 if (scall == MAT_INITIAL_MATRIX) { 4972 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4973 *A_loc = mpimat->A; 4974 } else if (scall == MAT_REUSE_MATRIX) { 4975 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4976 } 4977 PetscFunctionReturn(0); 4978 } 4979 4980 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4981 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4982 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4983 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4984 aa = a->a; ba = b->a; 4985 if (scall == MAT_INITIAL_MATRIX) { 4986 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4987 ci[0] = 0; 4988 for (i=0; i<am; i++) { 4989 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4990 } 4991 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4992 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4993 k = 0; 4994 for (i=0; i<am; i++) { 4995 ncols_o = bi[i+1] - bi[i]; 4996 ncols_d = ai[i+1] - ai[i]; 4997 /* off-diagonal portion of A */ 4998 for (jo=0; jo<ncols_o; jo++) { 4999 col = cmap[*bj]; 5000 if (col >= cstart) break; 5001 cj[k] = col; bj++; 5002 ca[k++] = *ba++; 5003 } 5004 /* diagonal portion of A */ 5005 for (j=0; j<ncols_d; j++) { 5006 cj[k] = cstart + *aj++; 5007 ca[k++] = *aa++; 5008 } 5009 /* off-diagonal portion of A */ 5010 for (j=jo; j<ncols_o; j++) { 5011 cj[k] = cmap[*bj++]; 5012 ca[k++] = *ba++; 5013 } 5014 } 5015 /* put together the new matrix */ 5016 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5017 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5018 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5019 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5020 mat->free_a = PETSC_TRUE; 5021 mat->free_ij = PETSC_TRUE; 5022 mat->nonew = 0; 5023 } else if (scall == MAT_REUSE_MATRIX) { 5024 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5025 ci = mat->i; cj = mat->j; cam = mat->a; 5026 for (i=0; i<am; i++) { 5027 /* off-diagonal portion of A */ 5028 ncols_o = bi[i+1] - bi[i]; 5029 for (jo=0; jo<ncols_o; jo++) { 5030 col = cmap[*bj]; 5031 if (col >= cstart) break; 5032 *cam++ = *ba++; bj++; 5033 } 5034 /* diagonal portion of A */ 5035 ncols_d = ai[i+1] - ai[i]; 5036 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5037 /* off-diagonal portion of A */ 5038 for (j=jo; j<ncols_o; j++) { 5039 *cam++ = *ba++; bj++; 5040 } 5041 } 5042 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5043 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5044 PetscFunctionReturn(0); 5045 } 5046 5047 /*@C 5048 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5049 5050 Not Collective 5051 5052 Input Parameters: 5053 + A - the matrix 5054 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5055 - row, col - index sets of rows and columns to extract (or NULL) 5056 5057 Output Parameter: 5058 . A_loc - the local sequential matrix generated 5059 5060 Level: developer 5061 5062 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5063 5064 @*/ 5065 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5066 { 5067 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5068 PetscErrorCode ierr; 5069 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5070 IS isrowa,iscola; 5071 Mat *aloc; 5072 PetscBool match; 5073 5074 PetscFunctionBegin; 5075 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5076 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5077 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5078 if (!row) { 5079 start = A->rmap->rstart; end = A->rmap->rend; 5080 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5081 } else { 5082 isrowa = *row; 5083 } 5084 if (!col) { 5085 start = A->cmap->rstart; 5086 cmap = a->garray; 5087 nzA = a->A->cmap->n; 5088 nzB = a->B->cmap->n; 5089 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5090 ncols = 0; 5091 for (i=0; i<nzB; i++) { 5092 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5093 else break; 5094 } 5095 imark = i; 5096 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5097 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5098 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5099 } else { 5100 iscola = *col; 5101 } 5102 if (scall != MAT_INITIAL_MATRIX) { 5103 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5104 aloc[0] = *A_loc; 5105 } 5106 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5107 if (!col) { /* attach global id of condensed columns */ 5108 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5109 } 5110 *A_loc = aloc[0]; 5111 ierr = PetscFree(aloc);CHKERRQ(ierr); 5112 if (!row) { 5113 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5114 } 5115 if (!col) { 5116 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5117 } 5118 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5119 PetscFunctionReturn(0); 5120 } 5121 5122 /* 5123 * Destroy a mat that may be composed with PetscSF communication objects. 5124 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5125 * */ 5126 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5127 { 5128 PetscSF sf,osf; 5129 IS map; 5130 PetscErrorCode ierr; 5131 5132 PetscFunctionBegin; 5133 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5134 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5135 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5136 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5137 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5138 ierr = ISDestroy(&map);CHKERRQ(ierr); 5139 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5140 PetscFunctionReturn(0); 5141 } 5142 5143 /* 5144 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5145 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5146 * on a global size. 5147 * */ 5148 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5149 { 5150 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5151 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5152 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5153 PetscMPIInt owner; 5154 PetscSFNode *iremote,*oiremote; 5155 const PetscInt *lrowindices; 5156 PetscErrorCode ierr; 5157 PetscSF sf,osf; 5158 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5159 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5160 MPI_Comm comm; 5161 ISLocalToGlobalMapping mapping; 5162 5163 PetscFunctionBegin; 5164 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5165 /* plocalsize is the number of roots 5166 * nrows is the number of leaves 5167 * */ 5168 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5169 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5170 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5171 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5172 for (i=0;i<nrows;i++) { 5173 /* Find a remote index and an owner for a row 5174 * The row could be local or remote 5175 * */ 5176 owner = 0; 5177 lidx = 0; 5178 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5179 iremote[i].index = lidx; 5180 iremote[i].rank = owner; 5181 } 5182 /* Create SF to communicate how many nonzero columns for each row */ 5183 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5184 /* SF will figure out the number of nonzero colunms for each row, and their 5185 * offsets 5186 * */ 5187 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5188 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5189 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5190 5191 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5192 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5193 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5194 roffsets[0] = 0; 5195 roffsets[1] = 0; 5196 for (i=0;i<plocalsize;i++) { 5197 /* diag */ 5198 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5199 /* off diag */ 5200 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5201 /* compute offsets so that we relative location for each row */ 5202 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5203 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5204 } 5205 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5206 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5207 /* 'r' means root, and 'l' means leaf */ 5208 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5209 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5210 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5211 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5212 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5213 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5214 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5215 dntotalcols = 0; 5216 ontotalcols = 0; 5217 ncol = 0; 5218 for (i=0;i<nrows;i++) { 5219 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5220 ncol = PetscMax(pnnz[i],ncol); 5221 /* diag */ 5222 dntotalcols += nlcols[i*2+0]; 5223 /* off diag */ 5224 ontotalcols += nlcols[i*2+1]; 5225 } 5226 /* We do not need to figure the right number of columns 5227 * since all the calculations will be done by going through the raw data 5228 * */ 5229 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5230 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5231 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5232 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5233 /* diag */ 5234 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5235 /* off diag */ 5236 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5237 /* diag */ 5238 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5239 /* off diag */ 5240 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5241 dntotalcols = 0; 5242 ontotalcols = 0; 5243 ntotalcols = 0; 5244 for (i=0;i<nrows;i++) { 5245 owner = 0; 5246 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5247 /* Set iremote for diag matrix */ 5248 for (j=0;j<nlcols[i*2+0];j++) { 5249 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5250 iremote[dntotalcols].rank = owner; 5251 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5252 ilocal[dntotalcols++] = ntotalcols++; 5253 } 5254 /* off diag */ 5255 for (j=0;j<nlcols[i*2+1];j++) { 5256 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5257 oiremote[ontotalcols].rank = owner; 5258 oilocal[ontotalcols++] = ntotalcols++; 5259 } 5260 } 5261 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5262 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5263 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5264 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5265 /* P serves as roots and P_oth is leaves 5266 * Diag matrix 5267 * */ 5268 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5269 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5270 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5271 5272 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5273 /* Off diag */ 5274 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5275 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5276 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5277 /* We operate on the matrix internal data for saving memory */ 5278 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5279 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5280 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5281 /* Convert to global indices for diag matrix */ 5282 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5283 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5284 /* We want P_oth store global indices */ 5285 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5286 /* Use memory scalable approach */ 5287 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5288 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5289 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5290 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5291 /* Convert back to local indices */ 5292 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5293 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5294 nout = 0; 5295 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5296 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5297 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5298 /* Exchange values */ 5299 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5300 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5301 /* Stop PETSc from shrinking memory */ 5302 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5303 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5304 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5305 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5306 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5307 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5308 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5309 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5310 PetscFunctionReturn(0); 5311 } 5312 5313 /* 5314 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5315 * This supports MPIAIJ and MAIJ 5316 * */ 5317 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5318 { 5319 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5320 Mat_SeqAIJ *p_oth; 5321 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5322 IS rows,map; 5323 PetscHMapI hamp; 5324 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5325 MPI_Comm comm; 5326 PetscSF sf,osf; 5327 PetscBool has; 5328 PetscErrorCode ierr; 5329 5330 PetscFunctionBegin; 5331 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5332 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5333 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5334 * and then create a submatrix (that often is an overlapping matrix) 5335 * */ 5336 if (reuse==MAT_INITIAL_MATRIX) { 5337 /* Use a hash table to figure out unique keys */ 5338 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5339 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5340 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5341 count = 0; 5342 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5343 for (i=0;i<a->B->cmap->n;i++) { 5344 key = a->garray[i]/dof; 5345 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5346 if (!has) { 5347 mapping[i] = count; 5348 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5349 } else { 5350 /* Current 'i' has the same value the previous step */ 5351 mapping[i] = count-1; 5352 } 5353 } 5354 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5355 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5356 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5357 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5358 off = 0; 5359 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5360 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5361 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5362 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5363 /* In case, the matrix was already created but users want to recreate the matrix */ 5364 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5365 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5366 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5367 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5368 } else if (reuse==MAT_REUSE_MATRIX) { 5369 /* If matrix was already created, we simply update values using SF objects 5370 * that as attached to the matrix ealier. 5371 * */ 5372 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5373 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5374 if (!sf || !osf) { 5375 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5376 } 5377 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5378 /* Update values in place */ 5379 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5380 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5381 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5382 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5383 } else { 5384 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5385 } 5386 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5387 PetscFunctionReturn(0); 5388 } 5389 5390 /*@C 5391 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5392 5393 Collective on Mat 5394 5395 Input Parameters: 5396 + A,B - the matrices in mpiaij format 5397 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5398 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5399 5400 Output Parameter: 5401 + rowb, colb - index sets of rows and columns of B to extract 5402 - B_seq - the sequential matrix generated 5403 5404 Level: developer 5405 5406 @*/ 5407 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5408 { 5409 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5410 PetscErrorCode ierr; 5411 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5412 IS isrowb,iscolb; 5413 Mat *bseq=NULL; 5414 5415 PetscFunctionBegin; 5416 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5417 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5418 } 5419 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5420 5421 if (scall == MAT_INITIAL_MATRIX) { 5422 start = A->cmap->rstart; 5423 cmap = a->garray; 5424 nzA = a->A->cmap->n; 5425 nzB = a->B->cmap->n; 5426 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5427 ncols = 0; 5428 for (i=0; i<nzB; i++) { /* row < local row index */ 5429 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5430 else break; 5431 } 5432 imark = i; 5433 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5434 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5435 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5436 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5437 } else { 5438 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5439 isrowb = *rowb; iscolb = *colb; 5440 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5441 bseq[0] = *B_seq; 5442 } 5443 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5444 *B_seq = bseq[0]; 5445 ierr = PetscFree(bseq);CHKERRQ(ierr); 5446 if (!rowb) { 5447 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5448 } else { 5449 *rowb = isrowb; 5450 } 5451 if (!colb) { 5452 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5453 } else { 5454 *colb = iscolb; 5455 } 5456 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5457 PetscFunctionReturn(0); 5458 } 5459 5460 /* 5461 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5462 of the OFF-DIAGONAL portion of local A 5463 5464 Collective on Mat 5465 5466 Input Parameters: 5467 + A,B - the matrices in mpiaij format 5468 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5469 5470 Output Parameter: 5471 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5472 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5473 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5474 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5475 5476 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5477 for this matrix. This is not desirable.. 5478 5479 Level: developer 5480 5481 */ 5482 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5483 { 5484 PetscErrorCode ierr; 5485 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5486 Mat_SeqAIJ *b_oth; 5487 VecScatter ctx; 5488 MPI_Comm comm; 5489 const PetscMPIInt *rprocs,*sprocs; 5490 const PetscInt *srow,*rstarts,*sstarts; 5491 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5492 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5493 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5494 MPI_Request *rwaits = NULL,*swaits = NULL; 5495 MPI_Status rstatus; 5496 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5497 5498 PetscFunctionBegin; 5499 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5500 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5501 5502 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5503 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5504 } 5505 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5506 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5507 5508 if (size == 1) { 5509 startsj_s = NULL; 5510 bufa_ptr = NULL; 5511 *B_oth = NULL; 5512 PetscFunctionReturn(0); 5513 } 5514 5515 ctx = a->Mvctx; 5516 tag = ((PetscObject)ctx)->tag; 5517 5518 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5519 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5520 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5521 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5522 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5523 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5524 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5525 5526 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5527 if (scall == MAT_INITIAL_MATRIX) { 5528 /* i-array */ 5529 /*---------*/ 5530 /* post receives */ 5531 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5532 for (i=0; i<nrecvs; i++) { 5533 rowlen = rvalues + rstarts[i]*rbs; 5534 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5535 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5536 } 5537 5538 /* pack the outgoing message */ 5539 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5540 5541 sstartsj[0] = 0; 5542 rstartsj[0] = 0; 5543 len = 0; /* total length of j or a array to be sent */ 5544 if (nsends) { 5545 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5546 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5547 } 5548 for (i=0; i<nsends; i++) { 5549 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5550 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5551 for (j=0; j<nrows; j++) { 5552 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5553 for (l=0; l<sbs; l++) { 5554 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5555 5556 rowlen[j*sbs+l] = ncols; 5557 5558 len += ncols; 5559 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5560 } 5561 k++; 5562 } 5563 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5564 5565 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5566 } 5567 /* recvs and sends of i-array are completed */ 5568 i = nrecvs; 5569 while (i--) { 5570 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5571 } 5572 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5573 ierr = PetscFree(svalues);CHKERRQ(ierr); 5574 5575 /* allocate buffers for sending j and a arrays */ 5576 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5577 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5578 5579 /* create i-array of B_oth */ 5580 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5581 5582 b_othi[0] = 0; 5583 len = 0; /* total length of j or a array to be received */ 5584 k = 0; 5585 for (i=0; i<nrecvs; i++) { 5586 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5587 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5588 for (j=0; j<nrows; j++) { 5589 b_othi[k+1] = b_othi[k] + rowlen[j]; 5590 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5591 k++; 5592 } 5593 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5594 } 5595 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5596 5597 /* allocate space for j and a arrrays of B_oth */ 5598 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5599 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5600 5601 /* j-array */ 5602 /*---------*/ 5603 /* post receives of j-array */ 5604 for (i=0; i<nrecvs; i++) { 5605 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5606 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5607 } 5608 5609 /* pack the outgoing message j-array */ 5610 if (nsends) k = sstarts[0]; 5611 for (i=0; i<nsends; i++) { 5612 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5613 bufJ = bufj+sstartsj[i]; 5614 for (j=0; j<nrows; j++) { 5615 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5616 for (ll=0; ll<sbs; ll++) { 5617 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5618 for (l=0; l<ncols; l++) { 5619 *bufJ++ = cols[l]; 5620 } 5621 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5622 } 5623 } 5624 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5625 } 5626 5627 /* recvs and sends of j-array are completed */ 5628 i = nrecvs; 5629 while (i--) { 5630 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5631 } 5632 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5633 } else if (scall == MAT_REUSE_MATRIX) { 5634 sstartsj = *startsj_s; 5635 rstartsj = *startsj_r; 5636 bufa = *bufa_ptr; 5637 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5638 b_otha = b_oth->a; 5639 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5640 5641 /* a-array */ 5642 /*---------*/ 5643 /* post receives of a-array */ 5644 for (i=0; i<nrecvs; i++) { 5645 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5646 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5647 } 5648 5649 /* pack the outgoing message a-array */ 5650 if (nsends) k = sstarts[0]; 5651 for (i=0; i<nsends; i++) { 5652 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5653 bufA = bufa+sstartsj[i]; 5654 for (j=0; j<nrows; j++) { 5655 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5656 for (ll=0; ll<sbs; ll++) { 5657 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5658 for (l=0; l<ncols; l++) { 5659 *bufA++ = vals[l]; 5660 } 5661 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5662 } 5663 } 5664 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5665 } 5666 /* recvs and sends of a-array are completed */ 5667 i = nrecvs; 5668 while (i--) { 5669 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5670 } 5671 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5672 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5673 5674 if (scall == MAT_INITIAL_MATRIX) { 5675 /* put together the new matrix */ 5676 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5677 5678 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5679 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5680 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5681 b_oth->free_a = PETSC_TRUE; 5682 b_oth->free_ij = PETSC_TRUE; 5683 b_oth->nonew = 0; 5684 5685 ierr = PetscFree(bufj);CHKERRQ(ierr); 5686 if (!startsj_s || !bufa_ptr) { 5687 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5688 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5689 } else { 5690 *startsj_s = sstartsj; 5691 *startsj_r = rstartsj; 5692 *bufa_ptr = bufa; 5693 } 5694 } 5695 5696 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5697 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5698 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5699 PetscFunctionReturn(0); 5700 } 5701 5702 /*@C 5703 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5704 5705 Not Collective 5706 5707 Input Parameters: 5708 . A - The matrix in mpiaij format 5709 5710 Output Parameter: 5711 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5712 . colmap - A map from global column index to local index into lvec 5713 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5714 5715 Level: developer 5716 5717 @*/ 5718 #if defined(PETSC_USE_CTABLE) 5719 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5720 #else 5721 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5722 #endif 5723 { 5724 Mat_MPIAIJ *a; 5725 5726 PetscFunctionBegin; 5727 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5728 PetscValidPointer(lvec, 2); 5729 PetscValidPointer(colmap, 3); 5730 PetscValidPointer(multScatter, 4); 5731 a = (Mat_MPIAIJ*) A->data; 5732 if (lvec) *lvec = a->lvec; 5733 if (colmap) *colmap = a->colmap; 5734 if (multScatter) *multScatter = a->Mvctx; 5735 PetscFunctionReturn(0); 5736 } 5737 5738 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5739 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5740 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5741 #if defined(PETSC_HAVE_MKL_SPARSE) 5742 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5743 #endif 5744 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5745 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5746 #if defined(PETSC_HAVE_ELEMENTAL) 5747 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5748 #endif 5749 #if defined(PETSC_HAVE_HYPRE) 5750 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5751 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5752 #endif 5753 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5754 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5755 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5756 5757 /* 5758 Computes (B'*A')' since computing B*A directly is untenable 5759 5760 n p p 5761 ( ) ( ) ( ) 5762 m ( A ) * n ( B ) = m ( C ) 5763 ( ) ( ) ( ) 5764 5765 */ 5766 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5767 { 5768 PetscErrorCode ierr; 5769 Mat At,Bt,Ct; 5770 5771 PetscFunctionBegin; 5772 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5773 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5774 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5775 ierr = MatDestroy(&At);CHKERRQ(ierr); 5776 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5777 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5778 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5779 PetscFunctionReturn(0); 5780 } 5781 5782 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5783 { 5784 PetscErrorCode ierr; 5785 PetscInt m=A->rmap->n,n=B->cmap->n; 5786 Mat Cmat; 5787 5788 PetscFunctionBegin; 5789 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5790 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5791 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5792 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5793 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5794 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5795 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5796 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5797 5798 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5799 5800 *C = Cmat; 5801 PetscFunctionReturn(0); 5802 } 5803 5804 /* ----------------------------------------------------------------*/ 5805 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5806 { 5807 PetscErrorCode ierr; 5808 5809 PetscFunctionBegin; 5810 if (scall == MAT_INITIAL_MATRIX) { 5811 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5812 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5813 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5814 } 5815 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5816 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5817 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5818 PetscFunctionReturn(0); 5819 } 5820 5821 /*MC 5822 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5823 5824 Options Database Keys: 5825 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5826 5827 Level: beginner 5828 5829 Notes: 5830 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5831 in this case the values associated with the rows and columns one passes in are set to zero 5832 in the matrix 5833 5834 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5835 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5836 5837 .seealso: MatCreateAIJ() 5838 M*/ 5839 5840 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5841 { 5842 Mat_MPIAIJ *b; 5843 PetscErrorCode ierr; 5844 PetscMPIInt size; 5845 5846 PetscFunctionBegin; 5847 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5848 5849 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5850 B->data = (void*)b; 5851 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5852 B->assembled = PETSC_FALSE; 5853 B->insertmode = NOT_SET_VALUES; 5854 b->size = size; 5855 5856 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5857 5858 /* build cache for off array entries formed */ 5859 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5860 5861 b->donotstash = PETSC_FALSE; 5862 b->colmap = 0; 5863 b->garray = 0; 5864 b->roworiented = PETSC_TRUE; 5865 5866 /* stuff used for matrix vector multiply */ 5867 b->lvec = NULL; 5868 b->Mvctx = NULL; 5869 5870 /* stuff for MatGetRow() */ 5871 b->rowindices = 0; 5872 b->rowvalues = 0; 5873 b->getrowactive = PETSC_FALSE; 5874 5875 /* flexible pointer used in CUSP/CUSPARSE classes */ 5876 b->spptr = NULL; 5877 5878 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5879 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5880 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5881 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5882 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5884 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5885 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5887 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5888 #if defined(PETSC_HAVE_MKL_SPARSE) 5889 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5890 #endif 5891 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5892 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5894 #if defined(PETSC_HAVE_ELEMENTAL) 5895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5896 #endif 5897 #if defined(PETSC_HAVE_HYPRE) 5898 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5899 #endif 5900 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5901 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5902 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5903 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5904 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5905 #if defined(PETSC_HAVE_HYPRE) 5906 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5907 #endif 5908 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5909 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5910 PetscFunctionReturn(0); 5911 } 5912 5913 /*@C 5914 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5915 and "off-diagonal" part of the matrix in CSR format. 5916 5917 Collective 5918 5919 Input Parameters: 5920 + comm - MPI communicator 5921 . m - number of local rows (Cannot be PETSC_DECIDE) 5922 . n - This value should be the same as the local size used in creating the 5923 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5924 calculated if N is given) For square matrices n is almost always m. 5925 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5926 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5927 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5928 . j - column indices 5929 . a - matrix values 5930 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5931 . oj - column indices 5932 - oa - matrix values 5933 5934 Output Parameter: 5935 . mat - the matrix 5936 5937 Level: advanced 5938 5939 Notes: 5940 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5941 must free the arrays once the matrix has been destroyed and not before. 5942 5943 The i and j indices are 0 based 5944 5945 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5946 5947 This sets local rows and cannot be used to set off-processor values. 5948 5949 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5950 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5951 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5952 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5953 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5954 communication if it is known that only local entries will be set. 5955 5956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5957 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5958 @*/ 5959 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5960 { 5961 PetscErrorCode ierr; 5962 Mat_MPIAIJ *maij; 5963 5964 PetscFunctionBegin; 5965 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5966 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5967 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5968 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5969 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5970 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5971 maij = (Mat_MPIAIJ*) (*mat)->data; 5972 5973 (*mat)->preallocated = PETSC_TRUE; 5974 5975 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5976 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5977 5978 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5979 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5980 5981 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5982 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5983 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5984 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5985 5986 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5987 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5988 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5989 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5990 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5991 PetscFunctionReturn(0); 5992 } 5993 5994 /* 5995 Special version for direct calls from Fortran 5996 */ 5997 #include <petsc/private/fortranimpl.h> 5998 5999 /* Change these macros so can be used in void function */ 6000 #undef CHKERRQ 6001 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6002 #undef SETERRQ2 6003 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6004 #undef SETERRQ3 6005 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6006 #undef SETERRQ 6007 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6008 6009 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6010 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6011 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6012 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6013 #else 6014 #endif 6015 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6016 { 6017 Mat mat = *mmat; 6018 PetscInt m = *mm, n = *mn; 6019 InsertMode addv = *maddv; 6020 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6021 PetscScalar value; 6022 PetscErrorCode ierr; 6023 6024 MatCheckPreallocated(mat,1); 6025 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6026 6027 #if defined(PETSC_USE_DEBUG) 6028 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6029 #endif 6030 { 6031 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6032 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6033 PetscBool roworiented = aij->roworiented; 6034 6035 /* Some Variables required in the macro */ 6036 Mat A = aij->A; 6037 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6038 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6039 MatScalar *aa = a->a; 6040 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6041 Mat B = aij->B; 6042 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6043 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6044 MatScalar *ba = b->a; 6045 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6046 * cannot use "#if defined" inside a macro. */ 6047 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6048 6049 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6050 PetscInt nonew = a->nonew; 6051 MatScalar *ap1,*ap2; 6052 6053 PetscFunctionBegin; 6054 for (i=0; i<m; i++) { 6055 if (im[i] < 0) continue; 6056 #if defined(PETSC_USE_DEBUG) 6057 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6058 #endif 6059 if (im[i] >= rstart && im[i] < rend) { 6060 row = im[i] - rstart; 6061 lastcol1 = -1; 6062 rp1 = aj + ai[row]; 6063 ap1 = aa + ai[row]; 6064 rmax1 = aimax[row]; 6065 nrow1 = ailen[row]; 6066 low1 = 0; 6067 high1 = nrow1; 6068 lastcol2 = -1; 6069 rp2 = bj + bi[row]; 6070 ap2 = ba + bi[row]; 6071 rmax2 = bimax[row]; 6072 nrow2 = bilen[row]; 6073 low2 = 0; 6074 high2 = nrow2; 6075 6076 for (j=0; j<n; j++) { 6077 if (roworiented) value = v[i*n+j]; 6078 else value = v[i+j*m]; 6079 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6080 if (in[j] >= cstart && in[j] < cend) { 6081 col = in[j] - cstart; 6082 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6083 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6084 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6085 #endif 6086 } else if (in[j] < 0) continue; 6087 #if defined(PETSC_USE_DEBUG) 6088 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6089 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6090 #endif 6091 else { 6092 if (mat->was_assembled) { 6093 if (!aij->colmap) { 6094 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6095 } 6096 #if defined(PETSC_USE_CTABLE) 6097 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6098 col--; 6099 #else 6100 col = aij->colmap[in[j]] - 1; 6101 #endif 6102 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6103 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6104 col = in[j]; 6105 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6106 B = aij->B; 6107 b = (Mat_SeqAIJ*)B->data; 6108 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6109 rp2 = bj + bi[row]; 6110 ap2 = ba + bi[row]; 6111 rmax2 = bimax[row]; 6112 nrow2 = bilen[row]; 6113 low2 = 0; 6114 high2 = nrow2; 6115 bm = aij->B->rmap->n; 6116 ba = b->a; 6117 inserted = PETSC_FALSE; 6118 } 6119 } else col = in[j]; 6120 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6121 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6122 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6123 #endif 6124 } 6125 } 6126 } else if (!aij->donotstash) { 6127 if (roworiented) { 6128 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6129 } else { 6130 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6131 } 6132 } 6133 } 6134 } 6135 PetscFunctionReturnVoid(); 6136 } 6137