1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 584 #endif 585 if (im[i] >= rstart && im[i] < rend) { 586 row = im[i] - rstart; 587 lastcol1 = -1; 588 rp1 = aj + ai[row]; 589 ap1 = aa + ai[row]; 590 rmax1 = aimax[row]; 591 nrow1 = ailen[row]; 592 low1 = 0; 593 high1 = nrow1; 594 lastcol2 = -1; 595 rp2 = bj + bi[row]; 596 ap2 = ba + bi[row]; 597 rmax2 = bimax[row]; 598 nrow2 = bilen[row]; 599 low2 = 0; 600 high2 = nrow2; 601 602 for (j=0; j<n; j++) { 603 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 604 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 605 if (in[j] >= cstart && in[j] < cend) { 606 col = in[j] - cstart; 607 nonew = a->nonew; 608 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 610 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 611 #endif 612 } else if (in[j] < 0) continue; 613 #if defined(PETSC_USE_DEBUG) 614 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 615 #endif 616 else { 617 if (mat->was_assembled) { 618 if (!aij->colmap) { 619 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 620 } 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 623 col--; 624 #else 625 col = aij->colmap[in[j]] - 1; 626 #endif 627 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 628 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 629 col = in[j]; 630 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 631 B = aij->B; 632 b = (Mat_SeqAIJ*)B->data; 633 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 634 rp2 = bj + bi[row]; 635 ap2 = ba + bi[row]; 636 rmax2 = bimax[row]; 637 nrow2 = bilen[row]; 638 low2 = 0; 639 high2 = nrow2; 640 bm = aij->B->rmap->n; 641 ba = b->a; 642 inserted = PETSC_FALSE; 643 } else if (col < 0) { 644 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 645 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 646 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 647 } 648 } else col = in[j]; 649 nonew = b->nonew; 650 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 652 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 653 #endif 654 } 655 } 656 } else { 657 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 658 if (!aij->donotstash) { 659 mat->assembled = PETSC_FALSE; 660 if (roworiented) { 661 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 662 } else { 663 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 664 } 665 } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 673 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 674 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 675 */ 676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 679 Mat A = aij->A; /* diagonal part of the matrix */ 680 Mat B = aij->B; /* offdiagonal part of the matrix */ 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 683 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 684 PetscInt *ailen = a->ilen,*aj = a->j; 685 PetscInt *bilen = b->ilen,*bj = b->j; 686 PetscInt am = aij->A->rmap->n,j; 687 PetscInt diag_so_far = 0,dnz; 688 PetscInt offd_so_far = 0,onz; 689 690 PetscFunctionBegin; 691 /* Iterate over all rows of the matrix */ 692 for (j=0; j<am; j++) { 693 dnz = onz = 0; 694 /* Iterate over all non-zero columns of the current row */ 695 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 696 /* If column is in the diagonal */ 697 if (mat_j[col] >= cstart && mat_j[col] < cend) { 698 aj[diag_so_far++] = mat_j[col] - cstart; 699 dnz++; 700 } else { /* off-diagonal entries */ 701 bj[offd_so_far++] = mat_j[col]; 702 onz++; 703 } 704 } 705 ailen[j] = dnz; 706 bilen[j] = onz; 707 } 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 713 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 714 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 715 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 716 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 717 */ 718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat A = aij->A; /* diagonal part of the matrix */ 722 Mat B = aij->B; /* offdiagonal part of the matrix */ 723 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 725 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 727 PetscInt *ailen = a->ilen,*aj = a->j; 728 PetscInt *bilen = b->ilen,*bj = b->j; 729 PetscInt am = aij->A->rmap->n,j; 730 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 731 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 732 PetscScalar *aa = a->a,*ba = b->a; 733 734 PetscFunctionBegin; 735 /* Iterate over all rows of the matrix */ 736 for (j=0; j<am; j++) { 737 dnz_row = onz_row = 0; 738 rowstart_offd = full_offd_i[j]; 739 rowstart_diag = full_diag_i[j]; 740 /* Iterate over all non-zero columns of the current row */ 741 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 742 /* If column is in the diagonal */ 743 if (mat_j[col] >= cstart && mat_j[col] < cend) { 744 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 745 aa[rowstart_diag+dnz_row] = mat_a[col]; 746 dnz_row++; 747 } else { /* off-diagonal entries */ 748 bj[rowstart_offd+onz_row] = mat_j[col]; 749 ba[rowstart_offd+onz_row] = mat_a[col]; 750 onz_row++; 751 } 752 } 753 ailen[j] = dnz_row; 754 bilen[j] = onz_row; 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 765 766 PetscFunctionBegin; 767 for (i=0; i<m; i++) { 768 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 769 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 770 if (idxm[i] >= rstart && idxm[i] < rend) { 771 row = idxm[i] - rstart; 772 for (j=0; j<n; j++) { 773 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 774 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 775 if (idxn[j] >= cstart && idxn[j] < cend) { 776 col = idxn[j] - cstart; 777 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 778 } else { 779 if (!aij->colmap) { 780 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 781 } 782 #if defined(PETSC_USE_CTABLE) 783 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 784 col--; 785 #else 786 col = aij->colmap[idxn[j]] - 1; 787 #endif 788 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 789 else { 790 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 800 801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 PetscErrorCode ierr; 805 PetscInt nstash,reallocs; 806 807 PetscFunctionBegin; 808 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 812 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt n; 822 PetscInt i,j,rstart,ncols,flg; 823 PetscInt *row,*col; 824 PetscBool other_disassembled; 825 PetscScalar *val; 826 827 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 828 829 PetscFunctionBegin; 830 if (!aij->donotstash && !mat->nooffprocentries) { 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 833 if (!flg) break; 834 835 for (i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for (j=i,rstart=row[j]; j<n; j++) { 838 if (row[j] != rstart) break; 839 } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 /* Now assemble all these values with a single function call */ 843 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 844 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 } 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 852 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 853 if (mat->boundtocpu) { 854 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 855 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 #endif 858 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 859 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 860 861 /* determine if any processor has disassembled, if so we must 862 also disassemble ourself, in order that we may reassemble. */ 863 /* 864 if nonzero structure of submatrix B cannot change then we know that 865 no processor disassembled thus we can skip this stuff 866 */ 867 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 868 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 869 if (mat->was_assembled && !other_disassembled) { 870 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 871 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 872 #endif 873 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 874 } 875 } 876 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 877 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 878 } 879 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 880 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 881 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 882 #endif 883 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 885 886 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 887 888 aij->rowvalues = 0; 889 890 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 891 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 892 893 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 894 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 895 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 896 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 897 } 898 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 899 mat->offloadmask = PETSC_OFFLOAD_BOTH; 900 #endif 901 PetscFunctionReturn(0); 902 } 903 904 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 905 { 906 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 907 PetscErrorCode ierr; 908 909 PetscFunctionBegin; 910 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 911 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 916 { 917 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 918 PetscObjectState sA, sB; 919 PetscInt *lrows; 920 PetscInt r, len; 921 PetscBool cong, lch, gch; 922 PetscErrorCode ierr; 923 924 PetscFunctionBegin; 925 /* get locally owned rows */ 926 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 927 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 928 /* fix right hand side if needed */ 929 if (x && b) { 930 const PetscScalar *xx; 931 PetscScalar *bb; 932 933 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 934 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 935 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 936 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 937 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 938 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 939 } 940 941 sA = mat->A->nonzerostate; 942 sB = mat->B->nonzerostate; 943 944 if (diag != 0.0 && cong) { 945 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 946 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 947 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 948 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 949 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 950 PetscInt nnwA, nnwB; 951 PetscBool nnzA, nnzB; 952 953 nnwA = aijA->nonew; 954 nnwB = aijB->nonew; 955 nnzA = aijA->keepnonzeropattern; 956 nnzB = aijB->keepnonzeropattern; 957 if (!nnzA) { 958 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 959 aijA->nonew = 0; 960 } 961 if (!nnzB) { 962 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 963 aijB->nonew = 0; 964 } 965 /* Must zero here before the next loop */ 966 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 967 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 968 for (r = 0; r < len; ++r) { 969 const PetscInt row = lrows[r] + A->rmap->rstart; 970 if (row >= A->cmap->N) continue; 971 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 972 } 973 aijA->nonew = nnwA; 974 aijB->nonew = nnwB; 975 } else { 976 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 977 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(lrows);CHKERRQ(ierr); 980 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 982 983 /* reduce nonzerostate */ 984 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 985 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 986 if (gch) A->nonzerostate++; 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 991 { 992 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 993 PetscErrorCode ierr; 994 PetscMPIInt n = A->rmap->n; 995 PetscInt i,j,r,m,len = 0; 996 PetscInt *lrows,*owners = A->rmap->range; 997 PetscMPIInt p = 0; 998 PetscSFNode *rrows; 999 PetscSF sf; 1000 const PetscScalar *xx; 1001 PetscScalar *bb,*mask; 1002 Vec xmask,lmask; 1003 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 1004 const PetscInt *aj, *ii,*ridx; 1005 PetscScalar *aa; 1006 1007 PetscFunctionBegin; 1008 /* Create SF where leaves are input rows and roots are owned rows */ 1009 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1010 for (r = 0; r < n; ++r) lrows[r] = -1; 1011 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1012 for (r = 0; r < N; ++r) { 1013 const PetscInt idx = rows[r]; 1014 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1015 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1016 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1017 } 1018 rrows[r].rank = p; 1019 rrows[r].index = rows[r] - owners[p]; 1020 } 1021 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1022 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1023 /* Collect flags for rows to be zeroed */ 1024 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1025 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1026 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1027 /* Compress and put in row numbers */ 1028 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1029 /* zero diagonal part of matrix */ 1030 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1031 /* handle off diagonal part of matrix */ 1032 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1033 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1034 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1035 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1036 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1037 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1039 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1040 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1041 PetscBool cong; 1042 1043 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1044 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1045 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1047 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1048 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1049 } 1050 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1051 /* remove zeroed rows of off diagonal matrix */ 1052 ii = aij->i; 1053 for (i=0; i<len; i++) { 1054 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1055 } 1056 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1057 if (aij->compressedrow.use) { 1058 m = aij->compressedrow.nrows; 1059 ii = aij->compressedrow.i; 1060 ridx = aij->compressedrow.rindex; 1061 for (i=0; i<m; i++) { 1062 n = ii[i+1] - ii[i]; 1063 aj = aij->j + ii[i]; 1064 aa = aij->a + ii[i]; 1065 1066 for (j=0; j<n; j++) { 1067 if (PetscAbsScalar(mask[*aj])) { 1068 if (b) bb[*ridx] -= *aa*xx[*aj]; 1069 *aa = 0.0; 1070 } 1071 aa++; 1072 aj++; 1073 } 1074 ridx++; 1075 } 1076 } else { /* do not use compressed row format */ 1077 m = l->B->rmap->n; 1078 for (i=0; i<m; i++) { 1079 n = ii[i+1] - ii[i]; 1080 aj = aij->j + ii[i]; 1081 aa = aij->a + ii[i]; 1082 for (j=0; j<n; j++) { 1083 if (PetscAbsScalar(mask[*aj])) { 1084 if (b) bb[i] -= *aa*xx[*aj]; 1085 *aa = 0.0; 1086 } 1087 aa++; 1088 aj++; 1089 } 1090 } 1091 } 1092 if (x && b) { 1093 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1094 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1095 } 1096 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1097 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1098 ierr = PetscFree(lrows);CHKERRQ(ierr); 1099 1100 /* only change matrix nonzero state if pattern was allowed to be changed */ 1101 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1102 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1103 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1104 } 1105 PetscFunctionReturn(0); 1106 } 1107 1108 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 PetscInt nt; 1113 VecScatter Mvctx = a->Mvctx; 1114 1115 PetscFunctionBegin; 1116 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1117 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1118 1119 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1121 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1122 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1127 { 1128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1137 { 1138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1139 PetscErrorCode ierr; 1140 VecScatter Mvctx = a->Mvctx; 1141 1142 PetscFunctionBegin; 1143 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1144 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1146 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1152 { 1153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 /* do nondiagonal part */ 1158 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1159 /* do local part */ 1160 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1161 /* add partial results together */ 1162 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1163 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1168 { 1169 MPI_Comm comm; 1170 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1171 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1172 IS Me,Notme; 1173 PetscErrorCode ierr; 1174 PetscInt M,N,first,last,*notme,i; 1175 PetscBool lf; 1176 PetscMPIInt size; 1177 1178 PetscFunctionBegin; 1179 /* Easy test: symmetric diagonal block */ 1180 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1181 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1182 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1183 if (!*f) PetscFunctionReturn(0); 1184 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1185 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1186 if (size == 1) PetscFunctionReturn(0); 1187 1188 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1189 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1190 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1191 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1192 for (i=0; i<first; i++) notme[i] = i; 1193 for (i=last; i<M; i++) notme[i-last+first] = i; 1194 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1195 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1196 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1197 Aoff = Aoffs[0]; 1198 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1199 Boff = Boffs[0]; 1200 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1201 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1202 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1203 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1204 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1205 ierr = PetscFree(notme);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1210 { 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1219 { 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 /* do nondiagonal part */ 1225 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1226 /* do local part */ 1227 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1228 /* add partial results together */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 /* 1235 This only works correctly for square matrices where the subblock A->A is the 1236 diagonal block 1237 */ 1238 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1239 { 1240 PetscErrorCode ierr; 1241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1242 1243 PetscFunctionBegin; 1244 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1245 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1246 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1251 { 1252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1257 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1262 { 1263 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 #if defined(PETSC_USE_LOG) 1268 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1269 #endif 1270 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1271 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1272 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1273 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1274 #if defined(PETSC_USE_CTABLE) 1275 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1276 #else 1277 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1278 #endif 1279 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1281 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1282 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1283 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1284 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1285 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1286 1287 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1297 #if defined(PETSC_HAVE_ELEMENTAL) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 const PetscInt *garray = aij->garray; 1316 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1317 PetscInt *rowlens; 1318 PetscInt *colidxs; 1319 PetscScalar *matvals; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1324 1325 M = mat->rmap->N; 1326 N = mat->cmap->N; 1327 m = mat->rmap->n; 1328 rs = mat->rmap->rstart; 1329 cs = mat->cmap->rstart; 1330 nz = A->nz + B->nz; 1331 1332 /* write matrix header */ 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = M; header[2] = N; header[3] = nz; 1335 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1337 1338 /* fill in and store row lengths */ 1339 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1340 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1341 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1343 1344 /* fill in and store column indices */ 1345 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1346 for (cnt=0, i=0; i<m; i++) { 1347 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1348 if (garray[B->j[jb]] > cs) break; 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1352 colidxs[cnt++] = A->j[ja] + cs; 1353 for (; jb<B->i[i+1]; jb++) 1354 colidxs[cnt++] = garray[B->j[jb]]; 1355 } 1356 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1357 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1358 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1359 1360 /* fill in and store nonzero values */ 1361 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1362 for (cnt=0, i=0; i<m; i++) { 1363 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1364 if (garray[B->j[jb]] > cs) break; 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1368 matvals[cnt++] = A->a[ja]; 1369 for (; jb<B->i[i+1]; jb++) 1370 matvals[cnt++] = B->a[jb]; 1371 } 1372 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1373 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = PetscFree(matvals);CHKERRQ(ierr); 1375 1376 /* write block size option to the viewer's .info file */ 1377 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #include <petscdraw.h> 1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1383 { 1384 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1385 PetscErrorCode ierr; 1386 PetscMPIInt rank = aij->rank,size = aij->size; 1387 PetscBool isdraw,iascii,isbinary; 1388 PetscViewer sviewer; 1389 PetscViewerFormat format; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1394 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 if (iascii) { 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1398 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1399 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1400 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1401 for (i=0; i<(PetscInt)size; i++) { 1402 nmax = PetscMax(nmax,nz[i]); 1403 nmin = PetscMin(nmin,nz[i]); 1404 navg += nz[i]; 1405 } 1406 ierr = PetscFree(nz);CHKERRQ(ierr); 1407 navg = navg/size; 1408 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1413 MatInfo info; 1414 PetscBool inodes; 1415 1416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1417 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1418 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1420 if (!inodes) { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1425 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1426 } 1427 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1434 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1437 PetscInt inodecount,inodelimit,*inodes; 1438 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1439 if (inodes) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1446 PetscFunctionReturn(0); 1447 } 1448 } else if (isbinary) { 1449 if (size == 1) { 1450 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1451 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (iascii && size == 1) { 1457 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1458 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } else if (isdraw) { 1461 PetscDraw draw; 1462 PetscBool isnull; 1463 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1464 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1465 if (isnull) PetscFunctionReturn(0); 1466 } 1467 1468 { /* assemble the entire matrix onto first processor */ 1469 Mat A = NULL, Av; 1470 IS isrow,iscol; 1471 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1474 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1475 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1476 /* The commented code uses MatCreateSubMatrices instead */ 1477 /* 1478 Mat *AA, A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1486 A = AA[0]; 1487 Av = AA[0]; 1488 } 1489 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1490 */ 1491 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1492 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1493 /* 1494 Everyone has to call to draw the matrix since the graphics waits are 1495 synchronized across all processors that share the PetscDraw object 1496 */ 1497 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1498 if (!rank) { 1499 if (((PetscObject)mat)->name) { 1500 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1501 } 1502 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = 0; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i]; 1680 PetscMPIInt rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt col = cdest[aj[j]]; 1684 PetscMPIInt cowner; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt col = gcdest[bj[j]]; 1691 PetscMPIInt cowner; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscLogDouble isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_IGNORE_ZERO_ENTRIES: 1805 MatCheckPreallocated(A,1); 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_USE_INODES: 1810 if (PetscUnlikely(!(A)->preallocated)) { 1811 a->inode_setoption = PETSC_TRUE; /* option will be set in MatMPIAIJSetPreallocation_MPIAIJ() */ 1812 a->inode_use = flg; 1813 } else { 1814 a->inode_setoption = PETSC_FALSE; 1815 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1816 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1817 } 1818 break; 1819 case MAT_ROW_ORIENTED: 1820 MatCheckPreallocated(A,1); 1821 a->roworiented = flg; 1822 1823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1824 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_NEW_DIAGONALS: 1827 case MAT_SORTED_FULL: 1828 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1829 break; 1830 case MAT_IGNORE_OFF_PROC_ENTRIES: 1831 a->donotstash = flg; 1832 break; 1833 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1834 case MAT_SPD: 1835 case MAT_SYMMETRIC: 1836 case MAT_STRUCTURALLY_SYMMETRIC: 1837 case MAT_HERMITIAN: 1838 case MAT_SYMMETRY_ETERNAL: 1839 break; 1840 case MAT_SUBMAT_SINGLEIS: 1841 A->submat_singleis = flg; 1842 break; 1843 case MAT_STRUCTURE_ONLY: 1844 /* The option is handled directly by MatSetOption() */ 1845 break; 1846 default: 1847 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1848 } 1849 PetscFunctionReturn(0); 1850 } 1851 1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1853 { 1854 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1855 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1856 PetscErrorCode ierr; 1857 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1858 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1859 PetscInt *cmap,*idx_p; 1860 1861 PetscFunctionBegin; 1862 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1863 mat->getrowactive = PETSC_TRUE; 1864 1865 if (!mat->rowvalues && (idx || v)) { 1866 /* 1867 allocate enough space to hold information from the longest row. 1868 */ 1869 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1870 PetscInt max = 1,tmp; 1871 for (i=0; i<matin->rmap->n; i++) { 1872 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1873 if (max < tmp) max = tmp; 1874 } 1875 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1876 } 1877 1878 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1879 lrow = row - rstart; 1880 1881 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1882 if (!v) {pvA = 0; pvB = 0;} 1883 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1884 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1885 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1886 nztot = nzA + nzB; 1887 1888 cmap = mat->garray; 1889 if (v || idx) { 1890 if (nztot) { 1891 /* Sort by increasing column numbers, assuming A and B already sorted */ 1892 PetscInt imark = -1; 1893 if (v) { 1894 *v = v_p = mat->rowvalues; 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1897 else break; 1898 } 1899 imark = i; 1900 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1901 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1902 } 1903 if (idx) { 1904 *idx = idx_p = mat->rowindices; 1905 if (imark > -1) { 1906 for (i=0; i<imark; i++) { 1907 idx_p[i] = cmap[cworkB[i]]; 1908 } 1909 } else { 1910 for (i=0; i<nzB; i++) { 1911 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1912 else break; 1913 } 1914 imark = i; 1915 } 1916 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1917 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1918 } 1919 } else { 1920 if (idx) *idx = 0; 1921 if (v) *v = 0; 1922 } 1923 } 1924 *nz = nztot; 1925 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1926 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 1934 PetscFunctionBegin; 1935 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1936 aij->getrowactive = PETSC_FALSE; 1937 PetscFunctionReturn(0); 1938 } 1939 1940 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1941 { 1942 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1943 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1944 PetscErrorCode ierr; 1945 PetscInt i,j,cstart = mat->cmap->rstart; 1946 PetscReal sum = 0.0; 1947 MatScalar *v; 1948 1949 PetscFunctionBegin; 1950 if (aij->size == 1) { 1951 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1952 } else { 1953 if (type == NORM_FROBENIUS) { 1954 v = amat->a; 1955 for (i=0; i<amat->nz; i++) { 1956 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1957 } 1958 v = bmat->a; 1959 for (i=0; i<bmat->nz; i++) { 1960 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1961 } 1962 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1963 *norm = PetscSqrtReal(*norm); 1964 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1965 } else if (type == NORM_1) { /* max column norm */ 1966 PetscReal *tmp,*tmp2; 1967 PetscInt *jj,*garray = aij->garray; 1968 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1969 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1970 *norm = 0.0; 1971 v = amat->a; jj = amat->j; 1972 for (j=0; j<amat->nz; j++) { 1973 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1974 } 1975 v = bmat->a; jj = bmat->j; 1976 for (j=0; j<bmat->nz; j++) { 1977 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1978 } 1979 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1980 for (j=0; j<mat->cmap->N; j++) { 1981 if (tmp2[j] > *norm) *norm = tmp2[j]; 1982 } 1983 ierr = PetscFree(tmp);CHKERRQ(ierr); 1984 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1985 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1986 } else if (type == NORM_INFINITY) { /* max row norm */ 1987 PetscReal ntemp = 0.0; 1988 for (j=0; j<aij->A->rmap->n; j++) { 1989 v = amat->a + amat->i[j]; 1990 sum = 0.0; 1991 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 v = bmat->a + bmat->i[j]; 1995 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1996 sum += PetscAbsScalar(*v); v++; 1997 } 1998 if (sum > ntemp) ntemp = sum; 1999 } 2000 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2001 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2002 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2008 { 2009 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2010 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2011 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2012 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2013 PetscErrorCode ierr; 2014 Mat B,A_diag,*B_diag; 2015 const MatScalar *array; 2016 2017 PetscFunctionBegin; 2018 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2019 ai = Aloc->i; aj = Aloc->j; 2020 bi = Bloc->i; bj = Bloc->j; 2021 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2022 PetscInt *d_nnz,*g_nnz,*o_nnz; 2023 PetscSFNode *oloc; 2024 PETSC_UNUSED PetscSF sf; 2025 2026 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2027 /* compute d_nnz for preallocation */ 2028 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2029 for (i=0; i<ai[ma]; i++) { 2030 d_nnz[aj[i]]++; 2031 } 2032 /* compute local off-diagonal contributions */ 2033 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2034 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2035 /* map those to global */ 2036 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2037 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2038 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2039 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2040 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2041 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2042 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2043 2044 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2045 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2046 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2047 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2048 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2049 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2050 } else { 2051 B = *matout; 2052 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2053 } 2054 2055 b = (Mat_MPIAIJ*)B->data; 2056 A_diag = a->A; 2057 B_diag = &b->A; 2058 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2059 A_diag_ncol = A_diag->cmap->N; 2060 B_diag_ilen = sub_B_diag->ilen; 2061 B_diag_i = sub_B_diag->i; 2062 2063 /* Set ilen for diagonal of B */ 2064 for (i=0; i<A_diag_ncol; i++) { 2065 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2066 } 2067 2068 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2069 very quickly (=without using MatSetValues), because all writes are local. */ 2070 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2071 2072 /* copy over the B part */ 2073 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2074 array = Bloc->a; 2075 row = A->rmap->rstart; 2076 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2077 cols_tmp = cols; 2078 for (i=0; i<mb; i++) { 2079 ncol = bi[i+1]-bi[i]; 2080 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2081 row++; 2082 array += ncol; cols_tmp += ncol; 2083 } 2084 ierr = PetscFree(cols);CHKERRQ(ierr); 2085 2086 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2089 *matout = B; 2090 } else { 2091 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2092 } 2093 PetscFunctionReturn(0); 2094 } 2095 2096 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2097 { 2098 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2099 Mat a = aij->A,b = aij->B; 2100 PetscErrorCode ierr; 2101 PetscInt s1,s2,s3; 2102 2103 PetscFunctionBegin; 2104 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2105 if (rr) { 2106 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2107 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2108 /* Overlap communication with computation. */ 2109 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2110 } 2111 if (ll) { 2112 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2113 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2114 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2115 } 2116 /* scale the diagonal block */ 2117 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2118 2119 if (rr) { 2120 /* Do a scatter end and then right scale the off-diagonal block */ 2121 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2122 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2123 } 2124 PetscFunctionReturn(0); 2125 } 2126 2127 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2128 { 2129 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2138 { 2139 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2140 Mat a,b,c,d; 2141 PetscBool flg; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 a = matA->A; b = matA->B; 2146 c = matB->A; d = matB->B; 2147 2148 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2149 if (flg) { 2150 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2151 } 2152 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2157 { 2158 PetscErrorCode ierr; 2159 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2160 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2161 2162 PetscFunctionBegin; 2163 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2164 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2165 /* because of the column compression in the off-processor part of the matrix a->B, 2166 the number of columns in a->B and b->B may be different, hence we cannot call 2167 the MatCopy() directly on the two parts. If need be, we can provide a more 2168 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2169 then copying the submatrices */ 2170 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2171 } else { 2172 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2173 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2174 } 2175 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2180 { 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 /* 2189 Computes the number of nonzeros per row needed for preallocation when X and Y 2190 have different nonzero structure. 2191 */ 2192 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2193 { 2194 PetscInt i,j,k,nzx,nzy; 2195 2196 PetscFunctionBegin; 2197 /* Set the number of nonzeros in the new matrix */ 2198 for (i=0; i<m; i++) { 2199 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2200 nzx = xi[i+1] - xi[i]; 2201 nzy = yi[i+1] - yi[i]; 2202 nnz[i] = 0; 2203 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2204 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2205 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2206 nnz[i]++; 2207 } 2208 for (; k<nzy; k++) nnz[i]++; 2209 } 2210 PetscFunctionReturn(0); 2211 } 2212 2213 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2214 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2215 { 2216 PetscErrorCode ierr; 2217 PetscInt m = Y->rmap->N; 2218 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2219 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2220 2221 PetscFunctionBegin; 2222 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2223 PetscFunctionReturn(0); 2224 } 2225 2226 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2227 { 2228 PetscErrorCode ierr; 2229 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2230 PetscBLASInt bnz,one=1; 2231 Mat_SeqAIJ *x,*y; 2232 2233 PetscFunctionBegin; 2234 if (str == SAME_NONZERO_PATTERN) { 2235 PetscScalar alpha = a; 2236 x = (Mat_SeqAIJ*)xx->A->data; 2237 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2238 y = (Mat_SeqAIJ*)yy->A->data; 2239 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2240 x = (Mat_SeqAIJ*)xx->B->data; 2241 y = (Mat_SeqAIJ*)yy->B->data; 2242 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2243 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2244 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2245 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2246 will be updated */ 2247 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2248 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2249 Y->offloadmask = PETSC_OFFLOAD_CPU; 2250 } 2251 #endif 2252 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2253 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2254 } else { 2255 Mat B; 2256 PetscInt *nnz_d,*nnz_o; 2257 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2258 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2259 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2260 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2261 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2262 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2263 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2264 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2265 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2266 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2267 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2268 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2269 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2270 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2271 } 2272 PetscFunctionReturn(0); 2273 } 2274 2275 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2276 2277 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2278 { 2279 #if defined(PETSC_USE_COMPLEX) 2280 PetscErrorCode ierr; 2281 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2282 2283 PetscFunctionBegin; 2284 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2285 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2286 #else 2287 PetscFunctionBegin; 2288 #endif 2289 PetscFunctionReturn(0); 2290 } 2291 2292 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2293 { 2294 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2295 PetscErrorCode ierr; 2296 2297 PetscFunctionBegin; 2298 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2299 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2304 { 2305 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2306 PetscErrorCode ierr; 2307 2308 PetscFunctionBegin; 2309 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2310 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2311 PetscFunctionReturn(0); 2312 } 2313 2314 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2315 { 2316 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2317 PetscErrorCode ierr; 2318 PetscInt i,*idxb = 0; 2319 PetscScalar *va,*vb; 2320 Vec vtmp; 2321 2322 PetscFunctionBegin; 2323 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2324 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2325 if (idx) { 2326 for (i=0; i<A->rmap->n; i++) { 2327 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2328 } 2329 } 2330 2331 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2332 if (idx) { 2333 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2334 } 2335 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2336 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2337 2338 for (i=0; i<A->rmap->n; i++) { 2339 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2340 va[i] = vb[i]; 2341 if (idx) idx[i] = a->garray[idxb[i]]; 2342 } 2343 } 2344 2345 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2346 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2347 ierr = PetscFree(idxb);CHKERRQ(ierr); 2348 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2353 { 2354 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2355 PetscErrorCode ierr; 2356 PetscInt i,*idxb = 0; 2357 PetscScalar *va,*vb; 2358 Vec vtmp; 2359 2360 PetscFunctionBegin; 2361 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2362 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2363 if (idx) { 2364 for (i=0; i<A->cmap->n; i++) { 2365 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2366 } 2367 } 2368 2369 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2370 if (idx) { 2371 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2372 } 2373 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2374 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2375 2376 for (i=0; i<A->rmap->n; i++) { 2377 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2378 va[i] = vb[i]; 2379 if (idx) idx[i] = a->garray[idxb[i]]; 2380 } 2381 } 2382 2383 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2384 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2385 ierr = PetscFree(idxb);CHKERRQ(ierr); 2386 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2391 { 2392 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2393 PetscInt n = A->rmap->n; 2394 PetscInt cstart = A->cmap->rstart; 2395 PetscInt *cmap = mat->garray; 2396 PetscInt *diagIdx, *offdiagIdx; 2397 Vec diagV, offdiagV; 2398 PetscScalar *a, *diagA, *offdiagA; 2399 PetscInt r; 2400 PetscErrorCode ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2404 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2405 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2406 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2407 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2408 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2409 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2410 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2411 for (r = 0; r < n; ++r) { 2412 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2413 a[r] = diagA[r]; 2414 idx[r] = cstart + diagIdx[r]; 2415 } else { 2416 a[r] = offdiagA[r]; 2417 idx[r] = cmap[offdiagIdx[r]]; 2418 } 2419 } 2420 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2421 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2423 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2424 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2425 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2430 { 2431 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2432 PetscInt n = A->rmap->n; 2433 PetscInt cstart = A->cmap->rstart; 2434 PetscInt *cmap = mat->garray; 2435 PetscInt *diagIdx, *offdiagIdx; 2436 Vec diagV, offdiagV; 2437 PetscScalar *a, *diagA, *offdiagA; 2438 PetscInt r; 2439 PetscErrorCode ierr; 2440 2441 PetscFunctionBegin; 2442 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2443 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2444 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2445 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2446 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2447 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2448 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2449 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2450 for (r = 0; r < n; ++r) { 2451 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2452 a[r] = diagA[r]; 2453 idx[r] = cstart + diagIdx[r]; 2454 } else { 2455 a[r] = offdiagA[r]; 2456 idx[r] = cmap[offdiagIdx[r]]; 2457 } 2458 } 2459 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2460 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2461 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2462 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2463 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2464 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2469 { 2470 PetscErrorCode ierr; 2471 Mat *dummy; 2472 2473 PetscFunctionBegin; 2474 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2475 *newmat = *dummy; 2476 ierr = PetscFree(dummy);CHKERRQ(ierr); 2477 PetscFunctionReturn(0); 2478 } 2479 2480 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2481 { 2482 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2483 PetscErrorCode ierr; 2484 2485 PetscFunctionBegin; 2486 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2487 A->factorerrortype = a->A->factorerrortype; 2488 PetscFunctionReturn(0); 2489 } 2490 2491 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2492 { 2493 PetscErrorCode ierr; 2494 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2495 2496 PetscFunctionBegin; 2497 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2498 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2499 if (x->assembled) { 2500 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2501 } else { 2502 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2503 } 2504 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2505 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2506 PetscFunctionReturn(0); 2507 } 2508 2509 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2510 { 2511 PetscFunctionBegin; 2512 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2513 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2514 PetscFunctionReturn(0); 2515 } 2516 2517 /*@ 2518 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2519 2520 Collective on Mat 2521 2522 Input Parameters: 2523 + A - the matrix 2524 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2525 2526 Level: advanced 2527 2528 @*/ 2529 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2530 { 2531 PetscErrorCode ierr; 2532 2533 PetscFunctionBegin; 2534 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2539 { 2540 PetscErrorCode ierr; 2541 PetscBool sc = PETSC_FALSE,flg; 2542 2543 PetscFunctionBegin; 2544 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2545 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2546 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2547 if (flg) { 2548 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2549 } 2550 ierr = PetscOptionsTail();CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2555 { 2556 PetscErrorCode ierr; 2557 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2558 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2559 2560 PetscFunctionBegin; 2561 if (!Y->preallocated) { 2562 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2563 } else if (!aij->nz) { 2564 PetscInt nonew = aij->nonew; 2565 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2566 aij->nonew = nonew; 2567 } 2568 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2569 PetscFunctionReturn(0); 2570 } 2571 2572 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2573 { 2574 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2575 PetscErrorCode ierr; 2576 2577 PetscFunctionBegin; 2578 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2579 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2580 if (d) { 2581 PetscInt rstart; 2582 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2583 *d += rstart; 2584 2585 } 2586 PetscFunctionReturn(0); 2587 } 2588 2589 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2590 { 2591 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2592 PetscErrorCode ierr; 2593 2594 PetscFunctionBegin; 2595 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 /* -------------------------------------------------------------------*/ 2600 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2601 MatGetRow_MPIAIJ, 2602 MatRestoreRow_MPIAIJ, 2603 MatMult_MPIAIJ, 2604 /* 4*/ MatMultAdd_MPIAIJ, 2605 MatMultTranspose_MPIAIJ, 2606 MatMultTransposeAdd_MPIAIJ, 2607 0, 2608 0, 2609 0, 2610 /*10*/ 0, 2611 0, 2612 0, 2613 MatSOR_MPIAIJ, 2614 MatTranspose_MPIAIJ, 2615 /*15*/ MatGetInfo_MPIAIJ, 2616 MatEqual_MPIAIJ, 2617 MatGetDiagonal_MPIAIJ, 2618 MatDiagonalScale_MPIAIJ, 2619 MatNorm_MPIAIJ, 2620 /*20*/ MatAssemblyBegin_MPIAIJ, 2621 MatAssemblyEnd_MPIAIJ, 2622 MatSetOption_MPIAIJ, 2623 MatZeroEntries_MPIAIJ, 2624 /*24*/ MatZeroRows_MPIAIJ, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*29*/ MatSetUp_MPIAIJ, 2630 0, 2631 0, 2632 MatGetDiagonalBlock_MPIAIJ, 2633 0, 2634 /*34*/ MatDuplicate_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*39*/ MatAXPY_MPIAIJ, 2640 MatCreateSubMatrices_MPIAIJ, 2641 MatIncreaseOverlap_MPIAIJ, 2642 MatGetValues_MPIAIJ, 2643 MatCopy_MPIAIJ, 2644 /*44*/ MatGetRowMax_MPIAIJ, 2645 MatScale_MPIAIJ, 2646 MatShift_MPIAIJ, 2647 MatDiagonalSet_MPIAIJ, 2648 MatZeroRowsColumns_MPIAIJ, 2649 /*49*/ MatSetRandom_MPIAIJ, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*54*/ MatFDColoringCreate_MPIXAIJ, 2655 0, 2656 MatSetUnfactored_MPIAIJ, 2657 MatPermute_MPIAIJ, 2658 0, 2659 /*59*/ MatCreateSubMatrix_MPIAIJ, 2660 MatDestroy_MPIAIJ, 2661 MatView_MPIAIJ, 2662 0, 2663 0, 2664 /*64*/ 0, 2665 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2670 MatGetRowMinAbs_MPIAIJ, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*75*/ MatFDColoringApply_AIJ, 2676 MatSetFromOptions_MPIAIJ, 2677 0, 2678 0, 2679 MatFindZeroDiagonals_MPIAIJ, 2680 /*80*/ 0, 2681 0, 2682 0, 2683 /*83*/ MatLoad_MPIAIJ, 2684 MatIsSymmetric_MPIAIJ, 2685 0, 2686 0, 2687 0, 2688 0, 2689 /*89*/ 0, 2690 0, 2691 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2692 0, 2693 0, 2694 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2695 0, 2696 0, 2697 0, 2698 MatBindToCPU_MPIAIJ, 2699 /*99*/ MatProductSetFromOptions_MPIAIJ, 2700 0, 2701 0, 2702 MatConjugate_MPIAIJ, 2703 0, 2704 /*104*/MatSetValuesRow_MPIAIJ, 2705 MatRealPart_MPIAIJ, 2706 MatImaginaryPart_MPIAIJ, 2707 0, 2708 0, 2709 /*109*/0, 2710 0, 2711 MatGetRowMin_MPIAIJ, 2712 0, 2713 MatMissingDiagonal_MPIAIJ, 2714 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2715 0, 2716 MatGetGhosts_MPIAIJ, 2717 0, 2718 0, 2719 /*119*/0, 2720 0, 2721 0, 2722 0, 2723 MatGetMultiProcBlock_MPIAIJ, 2724 /*124*/MatFindNonzeroRows_MPIAIJ, 2725 MatGetColumnNorms_MPIAIJ, 2726 MatInvertBlockDiagonal_MPIAIJ, 2727 MatInvertVariableBlockDiagonal_MPIAIJ, 2728 MatCreateSubMatricesMPI_MPIAIJ, 2729 /*129*/0, 2730 0, 2731 0, 2732 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2733 0, 2734 /*134*/0, 2735 0, 2736 0, 2737 0, 2738 0, 2739 /*139*/MatSetBlockSizes_MPIAIJ, 2740 0, 2741 0, 2742 MatFDColoringSetUp_MPIXAIJ, 2743 MatFindOffBlockDiagonalEntries_MPIAIJ, 2744 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2745 /*145*/0, 2746 0, 2747 0 2748 }; 2749 2750 /* ----------------------------------------------------------------------------------------*/ 2751 2752 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2753 { 2754 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2759 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2764 { 2765 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2766 PetscErrorCode ierr; 2767 2768 PetscFunctionBegin; 2769 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2770 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2771 PetscFunctionReturn(0); 2772 } 2773 2774 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2775 { 2776 Mat_MPIAIJ *b; 2777 PetscErrorCode ierr; 2778 PetscMPIInt size; 2779 2780 PetscFunctionBegin; 2781 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2782 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2783 b = (Mat_MPIAIJ*)B->data; 2784 2785 #if defined(PETSC_USE_CTABLE) 2786 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2787 #else 2788 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2789 #endif 2790 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2791 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2792 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2793 2794 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2795 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2796 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2797 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2798 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2799 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2800 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2801 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2802 2803 if (!B->preallocated) { 2804 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2805 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2806 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2807 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2808 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2809 } 2810 2811 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2812 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2813 B->preallocated = PETSC_TRUE; 2814 B->was_assembled = PETSC_FALSE; 2815 B->assembled = PETSC_FALSE; 2816 2817 /* Set inode option */ 2818 if (b->inode_setoption) { 2819 ierr = MatSetOption(b->A,MAT_USE_INODES,b->inode_use);CHKERRQ(ierr); 2820 ierr = MatSetOption(b->B,MAT_USE_INODES,b->inode_use);CHKERRQ(ierr); 2821 } 2822 PetscFunctionReturn(0); 2823 } 2824 2825 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2826 { 2827 Mat_MPIAIJ *b; 2828 PetscErrorCode ierr; 2829 2830 PetscFunctionBegin; 2831 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2832 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2833 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2834 b = (Mat_MPIAIJ*)B->data; 2835 2836 #if defined(PETSC_USE_CTABLE) 2837 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2838 #else 2839 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2840 #endif 2841 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2842 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2843 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2844 2845 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2846 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2847 B->preallocated = PETSC_TRUE; 2848 B->was_assembled = PETSC_FALSE; 2849 B->assembled = PETSC_FALSE; 2850 PetscFunctionReturn(0); 2851 } 2852 2853 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2854 { 2855 Mat mat; 2856 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2857 PetscErrorCode ierr; 2858 2859 PetscFunctionBegin; 2860 *newmat = 0; 2861 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2862 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2863 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2864 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2865 a = (Mat_MPIAIJ*)mat->data; 2866 2867 mat->factortype = matin->factortype; 2868 mat->assembled = matin->assembled; 2869 mat->insertmode = NOT_SET_VALUES; 2870 mat->preallocated = matin->preallocated; 2871 2872 a->size = oldmat->size; 2873 a->rank = oldmat->rank; 2874 a->donotstash = oldmat->donotstash; 2875 a->roworiented = oldmat->roworiented; 2876 a->rowindices = NULL; 2877 a->rowvalues = NULL; 2878 a->getrowactive = PETSC_FALSE; 2879 2880 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2881 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2882 2883 if (oldmat->colmap) { 2884 #if defined(PETSC_USE_CTABLE) 2885 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2886 #else 2887 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2888 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2889 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2890 #endif 2891 } else a->colmap = NULL; 2892 if (oldmat->garray) { 2893 PetscInt len; 2894 len = oldmat->B->cmap->n; 2895 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2896 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2897 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2898 } else a->garray = NULL; 2899 2900 /* It may happen MatDuplicate is called with a non-assembled matrix 2901 In fact, MatDuplicate only requires the matrix to be preallocated 2902 This may happen inside a DMCreateMatrix_Shell */ 2903 if (oldmat->lvec) { 2904 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2905 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2906 } 2907 if (oldmat->Mvctx) { 2908 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2909 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2910 } 2911 if (oldmat->Mvctx_mpi1) { 2912 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2913 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2914 } 2915 2916 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2917 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2918 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2919 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2920 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2921 *newmat = mat; 2922 PetscFunctionReturn(0); 2923 } 2924 2925 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2926 { 2927 PetscBool isbinary, ishdf5; 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2932 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2933 /* force binary viewer to load .info file if it has not yet done so */ 2934 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2935 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2936 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2937 if (isbinary) { 2938 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2939 } else if (ishdf5) { 2940 #if defined(PETSC_HAVE_HDF5) 2941 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2942 #else 2943 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2944 #endif 2945 } else { 2946 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2947 } 2948 PetscFunctionReturn(0); 2949 } 2950 2951 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2952 { 2953 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2954 PetscInt *rowidxs,*colidxs; 2955 PetscScalar *matvals; 2956 PetscErrorCode ierr; 2957 2958 PetscFunctionBegin; 2959 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2960 2961 /* read in matrix header */ 2962 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2963 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2964 M = header[1]; N = header[2]; nz = header[3]; 2965 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2966 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2967 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2968 2969 /* set block sizes from the viewer's .info file */ 2970 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2971 /* set global sizes if not set already */ 2972 if (mat->rmap->N < 0) mat->rmap->N = M; 2973 if (mat->cmap->N < 0) mat->cmap->N = N; 2974 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2975 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2976 2977 /* check if the matrix sizes are correct */ 2978 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2979 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2980 2981 /* read in row lengths and build row indices */ 2982 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2983 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2984 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2985 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2986 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2987 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2988 /* read in column indices and matrix values */ 2989 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2990 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2991 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2992 /* store matrix indices and values */ 2993 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2994 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2995 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2996 PetscFunctionReturn(0); 2997 } 2998 2999 /* Not scalable because of ISAllGather() unless getting all columns. */ 3000 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3001 { 3002 PetscErrorCode ierr; 3003 IS iscol_local; 3004 PetscBool isstride; 3005 PetscMPIInt lisstride=0,gisstride; 3006 3007 PetscFunctionBegin; 3008 /* check if we are grabbing all columns*/ 3009 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3010 3011 if (isstride) { 3012 PetscInt start,len,mstart,mlen; 3013 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3014 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3015 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3016 if (mstart == start && mlen-mstart == len) lisstride = 1; 3017 } 3018 3019 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3020 if (gisstride) { 3021 PetscInt N; 3022 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3023 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3024 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3025 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3026 } else { 3027 PetscInt cbs; 3028 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3029 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3030 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3031 } 3032 3033 *isseq = iscol_local; 3034 PetscFunctionReturn(0); 3035 } 3036 3037 /* 3038 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3039 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3040 3041 Input Parameters: 3042 mat - matrix 3043 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3044 i.e., mat->rstart <= isrow[i] < mat->rend 3045 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3046 i.e., mat->cstart <= iscol[i] < mat->cend 3047 Output Parameter: 3048 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3049 iscol_o - sequential column index set for retrieving mat->B 3050 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3051 */ 3052 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3053 { 3054 PetscErrorCode ierr; 3055 Vec x,cmap; 3056 const PetscInt *is_idx; 3057 PetscScalar *xarray,*cmaparray; 3058 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3059 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3060 Mat B=a->B; 3061 Vec lvec=a->lvec,lcmap; 3062 PetscInt i,cstart,cend,Bn=B->cmap->N; 3063 MPI_Comm comm; 3064 VecScatter Mvctx=a->Mvctx; 3065 3066 PetscFunctionBegin; 3067 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3068 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3069 3070 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3071 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3072 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3073 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3074 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3075 3076 /* Get start indices */ 3077 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3078 isstart -= ncols; 3079 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3080 3081 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3082 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3083 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3084 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3085 for (i=0; i<ncols; i++) { 3086 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3087 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3088 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3089 } 3090 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3091 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3092 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3093 3094 /* Get iscol_d */ 3095 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3096 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3097 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3098 3099 /* Get isrow_d */ 3100 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3101 rstart = mat->rmap->rstart; 3102 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3103 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3104 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3105 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3106 3107 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3108 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3109 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3110 3111 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3112 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3113 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3114 3115 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3116 3117 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3118 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3119 3120 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3121 /* off-process column indices */ 3122 count = 0; 3123 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3124 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3125 3126 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3127 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3128 for (i=0; i<Bn; i++) { 3129 if (PetscRealPart(xarray[i]) > -1.0) { 3130 idx[count] = i; /* local column index in off-diagonal part B */ 3131 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3132 count++; 3133 } 3134 } 3135 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3136 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3137 3138 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3139 /* cannot ensure iscol_o has same blocksize as iscol! */ 3140 3141 ierr = PetscFree(idx);CHKERRQ(ierr); 3142 *garray = cmap1; 3143 3144 ierr = VecDestroy(&x);CHKERRQ(ierr); 3145 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3146 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3147 PetscFunctionReturn(0); 3148 } 3149 3150 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3151 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3152 { 3153 PetscErrorCode ierr; 3154 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3155 Mat M = NULL; 3156 MPI_Comm comm; 3157 IS iscol_d,isrow_d,iscol_o; 3158 Mat Asub = NULL,Bsub = NULL; 3159 PetscInt n; 3160 3161 PetscFunctionBegin; 3162 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3163 3164 if (call == MAT_REUSE_MATRIX) { 3165 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3166 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3167 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3168 3169 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3170 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3171 3172 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3173 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3174 3175 /* Update diagonal and off-diagonal portions of submat */ 3176 asub = (Mat_MPIAIJ*)(*submat)->data; 3177 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3178 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3179 if (n) { 3180 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3181 } 3182 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3183 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3184 3185 } else { /* call == MAT_INITIAL_MATRIX) */ 3186 const PetscInt *garray; 3187 PetscInt BsubN; 3188 3189 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3190 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3191 3192 /* Create local submatrices Asub and Bsub */ 3193 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3194 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3195 3196 /* Create submatrix M */ 3197 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3198 3199 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3200 asub = (Mat_MPIAIJ*)M->data; 3201 3202 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3203 n = asub->B->cmap->N; 3204 if (BsubN > n) { 3205 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3206 const PetscInt *idx; 3207 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3208 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3209 3210 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3211 j = 0; 3212 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3213 for (i=0; i<n; i++) { 3214 if (j >= BsubN) break; 3215 while (subgarray[i] > garray[j]) j++; 3216 3217 if (subgarray[i] == garray[j]) { 3218 idx_new[i] = idx[j++]; 3219 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3220 } 3221 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3222 3223 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3224 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3225 3226 } else if (BsubN < n) { 3227 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3228 } 3229 3230 ierr = PetscFree(garray);CHKERRQ(ierr); 3231 *submat = M; 3232 3233 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3234 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3235 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3236 3237 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3238 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3239 3240 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3241 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3242 } 3243 PetscFunctionReturn(0); 3244 } 3245 3246 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3247 { 3248 PetscErrorCode ierr; 3249 IS iscol_local=NULL,isrow_d; 3250 PetscInt csize; 3251 PetscInt n,i,j,start,end; 3252 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3253 MPI_Comm comm; 3254 3255 PetscFunctionBegin; 3256 /* If isrow has same processor distribution as mat, 3257 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3258 if (call == MAT_REUSE_MATRIX) { 3259 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3260 if (isrow_d) { 3261 sameRowDist = PETSC_TRUE; 3262 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3263 } else { 3264 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3265 if (iscol_local) { 3266 sameRowDist = PETSC_TRUE; 3267 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3268 } 3269 } 3270 } else { 3271 /* Check if isrow has same processor distribution as mat */ 3272 sameDist[0] = PETSC_FALSE; 3273 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3274 if (!n) { 3275 sameDist[0] = PETSC_TRUE; 3276 } else { 3277 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3278 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3279 if (i >= start && j < end) { 3280 sameDist[0] = PETSC_TRUE; 3281 } 3282 } 3283 3284 /* Check if iscol has same processor distribution as mat */ 3285 sameDist[1] = PETSC_FALSE; 3286 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3287 if (!n) { 3288 sameDist[1] = PETSC_TRUE; 3289 } else { 3290 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3291 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3292 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3293 } 3294 3295 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3296 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3297 sameRowDist = tsameDist[0]; 3298 } 3299 3300 if (sameRowDist) { 3301 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3302 /* isrow and iscol have same processor distribution as mat */ 3303 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3304 PetscFunctionReturn(0); 3305 } else { /* sameRowDist */ 3306 /* isrow has same processor distribution as mat */ 3307 if (call == MAT_INITIAL_MATRIX) { 3308 PetscBool sorted; 3309 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3310 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3311 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3312 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3313 3314 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3315 if (sorted) { 3316 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3317 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320 } else { /* call == MAT_REUSE_MATRIX */ 3321 IS iscol_sub; 3322 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3323 if (iscol_sub) { 3324 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3325 PetscFunctionReturn(0); 3326 } 3327 } 3328 } 3329 } 3330 3331 /* General case: iscol -> iscol_local which has global size of iscol */ 3332 if (call == MAT_REUSE_MATRIX) { 3333 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3334 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3335 } else { 3336 if (!iscol_local) { 3337 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3338 } 3339 } 3340 3341 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3342 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3343 3344 if (call == MAT_INITIAL_MATRIX) { 3345 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3346 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3347 } 3348 PetscFunctionReturn(0); 3349 } 3350 3351 /*@C 3352 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3353 and "off-diagonal" part of the matrix in CSR format. 3354 3355 Collective 3356 3357 Input Parameters: 3358 + comm - MPI communicator 3359 . A - "diagonal" portion of matrix 3360 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3361 - garray - global index of B columns 3362 3363 Output Parameter: 3364 . mat - the matrix, with input A as its local diagonal matrix 3365 Level: advanced 3366 3367 Notes: 3368 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3369 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3370 3371 .seealso: MatCreateMPIAIJWithSplitArrays() 3372 @*/ 3373 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3374 { 3375 PetscErrorCode ierr; 3376 Mat_MPIAIJ *maij; 3377 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3378 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3379 PetscScalar *oa=b->a; 3380 Mat Bnew; 3381 PetscInt m,n,N; 3382 3383 PetscFunctionBegin; 3384 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3385 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3386 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3387 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3388 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3389 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3390 3391 /* Get global columns of mat */ 3392 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3393 3394 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3395 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3396 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3397 maij = (Mat_MPIAIJ*)(*mat)->data; 3398 3399 (*mat)->preallocated = PETSC_TRUE; 3400 3401 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3402 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3403 3404 /* Set A as diagonal portion of *mat */ 3405 maij->A = A; 3406 3407 nz = oi[m]; 3408 for (i=0; i<nz; i++) { 3409 col = oj[i]; 3410 oj[i] = garray[col]; 3411 } 3412 3413 /* Set Bnew as off-diagonal portion of *mat */ 3414 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3415 bnew = (Mat_SeqAIJ*)Bnew->data; 3416 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3417 maij->B = Bnew; 3418 3419 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3420 3421 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3422 b->free_a = PETSC_FALSE; 3423 b->free_ij = PETSC_FALSE; 3424 ierr = MatDestroy(&B);CHKERRQ(ierr); 3425 3426 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3427 bnew->free_a = PETSC_TRUE; 3428 bnew->free_ij = PETSC_TRUE; 3429 3430 /* condense columns of maij->B */ 3431 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3432 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3433 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3434 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3435 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3436 PetscFunctionReturn(0); 3437 } 3438 3439 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3440 3441 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3442 { 3443 PetscErrorCode ierr; 3444 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3445 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3446 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3447 Mat M,Msub,B=a->B; 3448 MatScalar *aa; 3449 Mat_SeqAIJ *aij; 3450 PetscInt *garray = a->garray,*colsub,Ncols; 3451 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3452 IS iscol_sub,iscmap; 3453 const PetscInt *is_idx,*cmap; 3454 PetscBool allcolumns=PETSC_FALSE; 3455 MPI_Comm comm; 3456 3457 PetscFunctionBegin; 3458 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3459 3460 if (call == MAT_REUSE_MATRIX) { 3461 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3462 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3463 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3464 3465 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3466 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3467 3468 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3469 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3470 3471 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3472 3473 } else { /* call == MAT_INITIAL_MATRIX) */ 3474 PetscBool flg; 3475 3476 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3477 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3478 3479 /* (1) iscol -> nonscalable iscol_local */ 3480 /* Check for special case: each processor gets entire matrix columns */ 3481 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3482 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3483 if (allcolumns) { 3484 iscol_sub = iscol_local; 3485 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3486 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3487 3488 } else { 3489 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3490 PetscInt *idx,*cmap1,k; 3491 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3492 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3493 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3494 count = 0; 3495 k = 0; 3496 for (i=0; i<Ncols; i++) { 3497 j = is_idx[i]; 3498 if (j >= cstart && j < cend) { 3499 /* diagonal part of mat */ 3500 idx[count] = j; 3501 cmap1[count++] = i; /* column index in submat */ 3502 } else if (Bn) { 3503 /* off-diagonal part of mat */ 3504 if (j == garray[k]) { 3505 idx[count] = j; 3506 cmap1[count++] = i; /* column index in submat */ 3507 } else if (j > garray[k]) { 3508 while (j > garray[k] && k < Bn-1) k++; 3509 if (j == garray[k]) { 3510 idx[count] = j; 3511 cmap1[count++] = i; /* column index in submat */ 3512 } 3513 } 3514 } 3515 } 3516 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3517 3518 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3519 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3520 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3521 3522 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3523 } 3524 3525 /* (3) Create sequential Msub */ 3526 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3527 } 3528 3529 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3530 aij = (Mat_SeqAIJ*)(Msub)->data; 3531 ii = aij->i; 3532 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3533 3534 /* 3535 m - number of local rows 3536 Ncols - number of columns (same on all processors) 3537 rstart - first row in new global matrix generated 3538 */ 3539 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3540 3541 if (call == MAT_INITIAL_MATRIX) { 3542 /* (4) Create parallel newmat */ 3543 PetscMPIInt rank,size; 3544 PetscInt csize; 3545 3546 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3547 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3548 3549 /* 3550 Determine the number of non-zeros in the diagonal and off-diagonal 3551 portions of the matrix in order to do correct preallocation 3552 */ 3553 3554 /* first get start and end of "diagonal" columns */ 3555 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3556 if (csize == PETSC_DECIDE) { 3557 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3558 if (mglobal == Ncols) { /* square matrix */ 3559 nlocal = m; 3560 } else { 3561 nlocal = Ncols/size + ((Ncols % size) > rank); 3562 } 3563 } else { 3564 nlocal = csize; 3565 } 3566 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3567 rstart = rend - nlocal; 3568 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3569 3570 /* next, compute all the lengths */ 3571 jj = aij->j; 3572 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3573 olens = dlens + m; 3574 for (i=0; i<m; i++) { 3575 jend = ii[i+1] - ii[i]; 3576 olen = 0; 3577 dlen = 0; 3578 for (j=0; j<jend; j++) { 3579 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3580 else dlen++; 3581 jj++; 3582 } 3583 olens[i] = olen; 3584 dlens[i] = dlen; 3585 } 3586 3587 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3588 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3589 3590 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3591 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3592 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3593 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3594 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3595 ierr = PetscFree(dlens);CHKERRQ(ierr); 3596 3597 } else { /* call == MAT_REUSE_MATRIX */ 3598 M = *newmat; 3599 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3600 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3601 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3602 /* 3603 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3604 rather than the slower MatSetValues(). 3605 */ 3606 M->was_assembled = PETSC_TRUE; 3607 M->assembled = PETSC_FALSE; 3608 } 3609 3610 /* (5) Set values of Msub to *newmat */ 3611 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3612 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3613 3614 jj = aij->j; 3615 aa = aij->a; 3616 for (i=0; i<m; i++) { 3617 row = rstart + i; 3618 nz = ii[i+1] - ii[i]; 3619 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3620 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3621 jj += nz; aa += nz; 3622 } 3623 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3624 3625 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3626 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3627 3628 ierr = PetscFree(colsub);CHKERRQ(ierr); 3629 3630 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3631 if (call == MAT_INITIAL_MATRIX) { 3632 *newmat = M; 3633 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3634 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3635 3636 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3637 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3638 3639 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3640 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3641 3642 if (iscol_local) { 3643 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3644 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3645 } 3646 } 3647 PetscFunctionReturn(0); 3648 } 3649 3650 /* 3651 Not great since it makes two copies of the submatrix, first an SeqAIJ 3652 in local and then by concatenating the local matrices the end result. 3653 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3654 3655 Note: This requires a sequential iscol with all indices. 3656 */ 3657 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3658 { 3659 PetscErrorCode ierr; 3660 PetscMPIInt rank,size; 3661 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3662 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3663 Mat M,Mreuse; 3664 MatScalar *aa,*vwork; 3665 MPI_Comm comm; 3666 Mat_SeqAIJ *aij; 3667 PetscBool colflag,allcolumns=PETSC_FALSE; 3668 3669 PetscFunctionBegin; 3670 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3671 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3672 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3673 3674 /* Check for special case: each processor gets entire matrix columns */ 3675 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3676 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3677 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3678 3679 if (call == MAT_REUSE_MATRIX) { 3680 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3681 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3682 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3683 } else { 3684 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3685 } 3686 3687 /* 3688 m - number of local rows 3689 n - number of columns (same on all processors) 3690 rstart - first row in new global matrix generated 3691 */ 3692 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3693 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3694 if (call == MAT_INITIAL_MATRIX) { 3695 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3696 ii = aij->i; 3697 jj = aij->j; 3698 3699 /* 3700 Determine the number of non-zeros in the diagonal and off-diagonal 3701 portions of the matrix in order to do correct preallocation 3702 */ 3703 3704 /* first get start and end of "diagonal" columns */ 3705 if (csize == PETSC_DECIDE) { 3706 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3707 if (mglobal == n) { /* square matrix */ 3708 nlocal = m; 3709 } else { 3710 nlocal = n/size + ((n % size) > rank); 3711 } 3712 } else { 3713 nlocal = csize; 3714 } 3715 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3716 rstart = rend - nlocal; 3717 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3718 3719 /* next, compute all the lengths */ 3720 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3721 olens = dlens + m; 3722 for (i=0; i<m; i++) { 3723 jend = ii[i+1] - ii[i]; 3724 olen = 0; 3725 dlen = 0; 3726 for (j=0; j<jend; j++) { 3727 if (*jj < rstart || *jj >= rend) olen++; 3728 else dlen++; 3729 jj++; 3730 } 3731 olens[i] = olen; 3732 dlens[i] = dlen; 3733 } 3734 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3735 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3736 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3737 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3738 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3739 ierr = PetscFree(dlens);CHKERRQ(ierr); 3740 } else { 3741 PetscInt ml,nl; 3742 3743 M = *newmat; 3744 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3745 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3746 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3747 /* 3748 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3749 rather than the slower MatSetValues(). 3750 */ 3751 M->was_assembled = PETSC_TRUE; 3752 M->assembled = PETSC_FALSE; 3753 } 3754 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3755 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3756 ii = aij->i; 3757 jj = aij->j; 3758 aa = aij->a; 3759 for (i=0; i<m; i++) { 3760 row = rstart + i; 3761 nz = ii[i+1] - ii[i]; 3762 cwork = jj; jj += nz; 3763 vwork = aa; aa += nz; 3764 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3765 } 3766 3767 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3768 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3769 *newmat = M; 3770 3771 /* save submatrix used in processor for next request */ 3772 if (call == MAT_INITIAL_MATRIX) { 3773 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3774 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3775 } 3776 PetscFunctionReturn(0); 3777 } 3778 3779 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3780 { 3781 PetscInt m,cstart, cend,j,nnz,i,d; 3782 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3783 const PetscInt *JJ; 3784 PetscErrorCode ierr; 3785 PetscBool nooffprocentries; 3786 3787 PetscFunctionBegin; 3788 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3789 3790 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3791 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3792 m = B->rmap->n; 3793 cstart = B->cmap->rstart; 3794 cend = B->cmap->rend; 3795 rstart = B->rmap->rstart; 3796 3797 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3798 3799 #if defined(PETSC_USE_DEBUG) 3800 for (i=0; i<m; i++) { 3801 nnz = Ii[i+1]- Ii[i]; 3802 JJ = J + Ii[i]; 3803 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3804 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3805 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3806 } 3807 #endif 3808 3809 for (i=0; i<m; i++) { 3810 nnz = Ii[i+1]- Ii[i]; 3811 JJ = J + Ii[i]; 3812 nnz_max = PetscMax(nnz_max,nnz); 3813 d = 0; 3814 for (j=0; j<nnz; j++) { 3815 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3816 } 3817 d_nnz[i] = d; 3818 o_nnz[i] = nnz - d; 3819 } 3820 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3821 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3822 3823 for (i=0; i<m; i++) { 3824 ii = i + rstart; 3825 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3826 } 3827 nooffprocentries = B->nooffprocentries; 3828 B->nooffprocentries = PETSC_TRUE; 3829 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 B->nooffprocentries = nooffprocentries; 3832 3833 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3834 PetscFunctionReturn(0); 3835 } 3836 3837 /*@ 3838 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3839 (the default parallel PETSc format). 3840 3841 Collective 3842 3843 Input Parameters: 3844 + B - the matrix 3845 . i - the indices into j for the start of each local row (starts with zero) 3846 . j - the column indices for each local row (starts with zero) 3847 - v - optional values in the matrix 3848 3849 Level: developer 3850 3851 Notes: 3852 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3853 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3854 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3855 3856 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3857 3858 The format which is used for the sparse matrix input, is equivalent to a 3859 row-major ordering.. i.e for the following matrix, the input data expected is 3860 as shown 3861 3862 $ 1 0 0 3863 $ 2 0 3 P0 3864 $ ------- 3865 $ 4 5 6 P1 3866 $ 3867 $ Process0 [P0]: rows_owned=[0,1] 3868 $ i = {0,1,3} [size = nrow+1 = 2+1] 3869 $ j = {0,0,2} [size = 3] 3870 $ v = {1,2,3} [size = 3] 3871 $ 3872 $ Process1 [P1]: rows_owned=[2] 3873 $ i = {0,3} [size = nrow+1 = 1+1] 3874 $ j = {0,1,2} [size = 3] 3875 $ v = {4,5,6} [size = 3] 3876 3877 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3878 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3879 @*/ 3880 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3881 { 3882 PetscErrorCode ierr; 3883 3884 PetscFunctionBegin; 3885 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3886 PetscFunctionReturn(0); 3887 } 3888 3889 /*@C 3890 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3891 (the default parallel PETSc format). For good matrix assembly performance 3892 the user should preallocate the matrix storage by setting the parameters 3893 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3894 performance can be increased by more than a factor of 50. 3895 3896 Collective 3897 3898 Input Parameters: 3899 + B - the matrix 3900 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3901 (same value is used for all local rows) 3902 . d_nnz - array containing the number of nonzeros in the various rows of the 3903 DIAGONAL portion of the local submatrix (possibly different for each row) 3904 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3905 The size of this array is equal to the number of local rows, i.e 'm'. 3906 For matrices that will be factored, you must leave room for (and set) 3907 the diagonal entry even if it is zero. 3908 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3909 submatrix (same value is used for all local rows). 3910 - o_nnz - array containing the number of nonzeros in the various rows of the 3911 OFF-DIAGONAL portion of the local submatrix (possibly different for 3912 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3913 structure. The size of this array is equal to the number 3914 of local rows, i.e 'm'. 3915 3916 If the *_nnz parameter is given then the *_nz parameter is ignored 3917 3918 The AIJ format (also called the Yale sparse matrix format or 3919 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3920 storage. The stored row and column indices begin with zero. 3921 See Users-Manual: ch_mat for details. 3922 3923 The parallel matrix is partitioned such that the first m0 rows belong to 3924 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3925 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3926 3927 The DIAGONAL portion of the local submatrix of a processor can be defined 3928 as the submatrix which is obtained by extraction the part corresponding to 3929 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3930 first row that belongs to the processor, r2 is the last row belonging to 3931 the this processor, and c1-c2 is range of indices of the local part of a 3932 vector suitable for applying the matrix to. This is an mxn matrix. In the 3933 common case of a square matrix, the row and column ranges are the same and 3934 the DIAGONAL part is also square. The remaining portion of the local 3935 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3936 3937 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3938 3939 You can call MatGetInfo() to get information on how effective the preallocation was; 3940 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3941 You can also run with the option -info and look for messages with the string 3942 malloc in them to see if additional memory allocation was needed. 3943 3944 Example usage: 3945 3946 Consider the following 8x8 matrix with 34 non-zero values, that is 3947 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3948 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3949 as follows: 3950 3951 .vb 3952 1 2 0 | 0 3 0 | 0 4 3953 Proc0 0 5 6 | 7 0 0 | 8 0 3954 9 0 10 | 11 0 0 | 12 0 3955 ------------------------------------- 3956 13 0 14 | 15 16 17 | 0 0 3957 Proc1 0 18 0 | 19 20 21 | 0 0 3958 0 0 0 | 22 23 0 | 24 0 3959 ------------------------------------- 3960 Proc2 25 26 27 | 0 0 28 | 29 0 3961 30 0 0 | 31 32 33 | 0 34 3962 .ve 3963 3964 This can be represented as a collection of submatrices as: 3965 3966 .vb 3967 A B C 3968 D E F 3969 G H I 3970 .ve 3971 3972 Where the submatrices A,B,C are owned by proc0, D,E,F are 3973 owned by proc1, G,H,I are owned by proc2. 3974 3975 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3976 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3977 The 'M','N' parameters are 8,8, and have the same values on all procs. 3978 3979 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3980 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3981 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3982 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3983 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3984 matrix, ans [DF] as another SeqAIJ matrix. 3985 3986 When d_nz, o_nz parameters are specified, d_nz storage elements are 3987 allocated for every row of the local diagonal submatrix, and o_nz 3988 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3989 One way to choose d_nz and o_nz is to use the max nonzerors per local 3990 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3991 In this case, the values of d_nz,o_nz are: 3992 .vb 3993 proc0 : dnz = 2, o_nz = 2 3994 proc1 : dnz = 3, o_nz = 2 3995 proc2 : dnz = 1, o_nz = 4 3996 .ve 3997 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3998 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3999 for proc3. i.e we are using 12+15+10=37 storage locations to store 4000 34 values. 4001 4002 When d_nnz, o_nnz parameters are specified, the storage is specified 4003 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4004 In the above case the values for d_nnz,o_nnz are: 4005 .vb 4006 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4007 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4008 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4009 .ve 4010 Here the space allocated is sum of all the above values i.e 34, and 4011 hence pre-allocation is perfect. 4012 4013 Level: intermediate 4014 4015 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4016 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4017 @*/ 4018 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4019 { 4020 PetscErrorCode ierr; 4021 4022 PetscFunctionBegin; 4023 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4024 PetscValidType(B,1); 4025 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4026 PetscFunctionReturn(0); 4027 } 4028 4029 /*@ 4030 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4031 CSR format for the local rows. 4032 4033 Collective 4034 4035 Input Parameters: 4036 + comm - MPI communicator 4037 . m - number of local rows (Cannot be PETSC_DECIDE) 4038 . n - This value should be the same as the local size used in creating the 4039 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4040 calculated if N is given) For square matrices n is almost always m. 4041 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4042 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4043 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4044 . j - column indices 4045 - a - matrix values 4046 4047 Output Parameter: 4048 . mat - the matrix 4049 4050 Level: intermediate 4051 4052 Notes: 4053 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4054 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4055 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4056 4057 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4058 4059 The format which is used for the sparse matrix input, is equivalent to a 4060 row-major ordering.. i.e for the following matrix, the input data expected is 4061 as shown 4062 4063 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4064 4065 $ 1 0 0 4066 $ 2 0 3 P0 4067 $ ------- 4068 $ 4 5 6 P1 4069 $ 4070 $ Process0 [P0]: rows_owned=[0,1] 4071 $ i = {0,1,3} [size = nrow+1 = 2+1] 4072 $ j = {0,0,2} [size = 3] 4073 $ v = {1,2,3} [size = 3] 4074 $ 4075 $ Process1 [P1]: rows_owned=[2] 4076 $ i = {0,3} [size = nrow+1 = 1+1] 4077 $ j = {0,1,2} [size = 3] 4078 $ v = {4,5,6} [size = 3] 4079 4080 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4081 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4082 @*/ 4083 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4084 { 4085 PetscErrorCode ierr; 4086 4087 PetscFunctionBegin; 4088 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4089 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4090 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4091 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4092 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4093 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4094 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4095 PetscFunctionReturn(0); 4096 } 4097 4098 /*@ 4099 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4100 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4101 4102 Collective 4103 4104 Input Parameters: 4105 + mat - the matrix 4106 . m - number of local rows (Cannot be PETSC_DECIDE) 4107 . n - This value should be the same as the local size used in creating the 4108 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4109 calculated if N is given) For square matrices n is almost always m. 4110 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4111 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4112 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4113 . J - column indices 4114 - v - matrix values 4115 4116 Level: intermediate 4117 4118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4119 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4120 @*/ 4121 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4122 { 4123 PetscErrorCode ierr; 4124 PetscInt cstart,nnz,i,j; 4125 PetscInt *ld; 4126 PetscBool nooffprocentries; 4127 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4128 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4129 PetscScalar *ad = Ad->a, *ao = Ao->a; 4130 const PetscInt *Adi = Ad->i; 4131 PetscInt ldi,Iii,md; 4132 4133 PetscFunctionBegin; 4134 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4135 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4136 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4137 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4138 4139 cstart = mat->cmap->rstart; 4140 if (!Aij->ld) { 4141 /* count number of entries below block diagonal */ 4142 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4143 Aij->ld = ld; 4144 for (i=0; i<m; i++) { 4145 nnz = Ii[i+1]- Ii[i]; 4146 j = 0; 4147 while (J[j] < cstart && j < nnz) {j++;} 4148 J += nnz; 4149 ld[i] = j; 4150 } 4151 } else { 4152 ld = Aij->ld; 4153 } 4154 4155 for (i=0; i<m; i++) { 4156 nnz = Ii[i+1]- Ii[i]; 4157 Iii = Ii[i]; 4158 ldi = ld[i]; 4159 md = Adi[i+1]-Adi[i]; 4160 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4161 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4162 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4163 ad += md; 4164 ao += nnz - md; 4165 } 4166 nooffprocentries = mat->nooffprocentries; 4167 mat->nooffprocentries = PETSC_TRUE; 4168 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4169 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4170 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4171 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4172 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4173 mat->nooffprocentries = nooffprocentries; 4174 PetscFunctionReturn(0); 4175 } 4176 4177 /*@C 4178 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4179 (the default parallel PETSc format). For good matrix assembly performance 4180 the user should preallocate the matrix storage by setting the parameters 4181 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4182 performance can be increased by more than a factor of 50. 4183 4184 Collective 4185 4186 Input Parameters: 4187 + comm - MPI communicator 4188 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4189 This value should be the same as the local size used in creating the 4190 y vector for the matrix-vector product y = Ax. 4191 . n - This value should be the same as the local size used in creating the 4192 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4193 calculated if N is given) For square matrices n is almost always m. 4194 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4195 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4196 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4197 (same value is used for all local rows) 4198 . d_nnz - array containing the number of nonzeros in the various rows of the 4199 DIAGONAL portion of the local submatrix (possibly different for each row) 4200 or NULL, if d_nz is used to specify the nonzero structure. 4201 The size of this array is equal to the number of local rows, i.e 'm'. 4202 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4203 submatrix (same value is used for all local rows). 4204 - o_nnz - array containing the number of nonzeros in the various rows of the 4205 OFF-DIAGONAL portion of the local submatrix (possibly different for 4206 each row) or NULL, if o_nz is used to specify the nonzero 4207 structure. The size of this array is equal to the number 4208 of local rows, i.e 'm'. 4209 4210 Output Parameter: 4211 . A - the matrix 4212 4213 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4214 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4215 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4216 4217 Notes: 4218 If the *_nnz parameter is given then the *_nz parameter is ignored 4219 4220 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4221 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4222 storage requirements for this matrix. 4223 4224 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4225 processor than it must be used on all processors that share the object for 4226 that argument. 4227 4228 The user MUST specify either the local or global matrix dimensions 4229 (possibly both). 4230 4231 The parallel matrix is partitioned across processors such that the 4232 first m0 rows belong to process 0, the next m1 rows belong to 4233 process 1, the next m2 rows belong to process 2 etc.. where 4234 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4235 values corresponding to [m x N] submatrix. 4236 4237 The columns are logically partitioned with the n0 columns belonging 4238 to 0th partition, the next n1 columns belonging to the next 4239 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4240 4241 The DIAGONAL portion of the local submatrix on any given processor 4242 is the submatrix corresponding to the rows and columns m,n 4243 corresponding to the given processor. i.e diagonal matrix on 4244 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4245 etc. The remaining portion of the local submatrix [m x (N-n)] 4246 constitute the OFF-DIAGONAL portion. The example below better 4247 illustrates this concept. 4248 4249 For a square global matrix we define each processor's diagonal portion 4250 to be its local rows and the corresponding columns (a square submatrix); 4251 each processor's off-diagonal portion encompasses the remainder of the 4252 local matrix (a rectangular submatrix). 4253 4254 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4255 4256 When calling this routine with a single process communicator, a matrix of 4257 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4258 type of communicator, use the construction mechanism 4259 .vb 4260 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4261 .ve 4262 4263 $ MatCreate(...,&A); 4264 $ MatSetType(A,MATMPIAIJ); 4265 $ MatSetSizes(A, m,n,M,N); 4266 $ MatMPIAIJSetPreallocation(A,...); 4267 4268 By default, this format uses inodes (identical nodes) when possible. 4269 We search for consecutive rows with the same nonzero structure, thereby 4270 reusing matrix information to achieve increased efficiency. 4271 4272 Options Database Keys: 4273 + -mat_no_inode - Do not use inodes 4274 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4275 4276 4277 4278 Example usage: 4279 4280 Consider the following 8x8 matrix with 34 non-zero values, that is 4281 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4282 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4283 as follows 4284 4285 .vb 4286 1 2 0 | 0 3 0 | 0 4 4287 Proc0 0 5 6 | 7 0 0 | 8 0 4288 9 0 10 | 11 0 0 | 12 0 4289 ------------------------------------- 4290 13 0 14 | 15 16 17 | 0 0 4291 Proc1 0 18 0 | 19 20 21 | 0 0 4292 0 0 0 | 22 23 0 | 24 0 4293 ------------------------------------- 4294 Proc2 25 26 27 | 0 0 28 | 29 0 4295 30 0 0 | 31 32 33 | 0 34 4296 .ve 4297 4298 This can be represented as a collection of submatrices as 4299 4300 .vb 4301 A B C 4302 D E F 4303 G H I 4304 .ve 4305 4306 Where the submatrices A,B,C are owned by proc0, D,E,F are 4307 owned by proc1, G,H,I are owned by proc2. 4308 4309 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4310 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4311 The 'M','N' parameters are 8,8, and have the same values on all procs. 4312 4313 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4314 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4315 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4316 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4317 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4318 matrix, ans [DF] as another SeqAIJ matrix. 4319 4320 When d_nz, o_nz parameters are specified, d_nz storage elements are 4321 allocated for every row of the local diagonal submatrix, and o_nz 4322 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4323 One way to choose d_nz and o_nz is to use the max nonzerors per local 4324 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4325 In this case, the values of d_nz,o_nz are 4326 .vb 4327 proc0 : dnz = 2, o_nz = 2 4328 proc1 : dnz = 3, o_nz = 2 4329 proc2 : dnz = 1, o_nz = 4 4330 .ve 4331 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4332 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4333 for proc3. i.e we are using 12+15+10=37 storage locations to store 4334 34 values. 4335 4336 When d_nnz, o_nnz parameters are specified, the storage is specified 4337 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4338 In the above case the values for d_nnz,o_nnz are 4339 .vb 4340 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4341 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4342 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4343 .ve 4344 Here the space allocated is sum of all the above values i.e 34, and 4345 hence pre-allocation is perfect. 4346 4347 Level: intermediate 4348 4349 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4350 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4351 @*/ 4352 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4353 { 4354 PetscErrorCode ierr; 4355 PetscMPIInt size; 4356 4357 PetscFunctionBegin; 4358 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4359 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4360 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4361 if (size > 1) { 4362 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4363 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4364 } else { 4365 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4366 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4367 } 4368 PetscFunctionReturn(0); 4369 } 4370 4371 /*@C 4372 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4373 4374 Not collective 4375 4376 Input Parameter: 4377 . A - The MPIAIJ matrix 4378 4379 Output Parameters: 4380 + Ad - The local diagonal block as a SeqAIJ matrix 4381 . Ao - The local off-diagonal block as a SeqAIJ matrix 4382 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4383 4384 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4385 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4386 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4387 local column numbers to global column numbers in the original matrix. 4388 4389 Level: intermediate 4390 4391 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ 4392 @*/ 4393 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4394 { 4395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4396 PetscBool flg; 4397 PetscErrorCode ierr; 4398 4399 PetscFunctionBegin; 4400 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4401 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4402 if (Ad) *Ad = a->A; 4403 if (Ao) *Ao = a->B; 4404 if (colmap) *colmap = a->garray; 4405 PetscFunctionReturn(0); 4406 } 4407 4408 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4409 { 4410 PetscErrorCode ierr; 4411 PetscInt m,N,i,rstart,nnz,Ii; 4412 PetscInt *indx; 4413 PetscScalar *values; 4414 4415 PetscFunctionBegin; 4416 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4417 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4418 PetscInt *dnz,*onz,sum,bs,cbs; 4419 4420 if (n == PETSC_DECIDE) { 4421 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4422 } 4423 /* Check sum(n) = N */ 4424 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4425 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4426 4427 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4428 rstart -= m; 4429 4430 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4431 for (i=0; i<m; i++) { 4432 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4433 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4434 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4435 } 4436 4437 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4438 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4439 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4440 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4441 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4442 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4443 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4444 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4445 } 4446 4447 /* numeric phase */ 4448 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4449 for (i=0; i<m; i++) { 4450 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4451 Ii = i + rstart; 4452 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4453 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4454 } 4455 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4456 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4457 PetscFunctionReturn(0); 4458 } 4459 4460 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4461 { 4462 PetscErrorCode ierr; 4463 PetscMPIInt rank; 4464 PetscInt m,N,i,rstart,nnz; 4465 size_t len; 4466 const PetscInt *indx; 4467 PetscViewer out; 4468 char *name; 4469 Mat B; 4470 const PetscScalar *values; 4471 4472 PetscFunctionBegin; 4473 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4474 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4475 /* Should this be the type of the diagonal block of A? */ 4476 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4477 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4478 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4479 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4480 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4481 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4482 for (i=0; i<m; i++) { 4483 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4484 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4485 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4486 } 4487 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4488 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4489 4490 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4491 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4492 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4493 sprintf(name,"%s.%d",outfile,rank); 4494 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4495 ierr = PetscFree(name);CHKERRQ(ierr); 4496 ierr = MatView(B,out);CHKERRQ(ierr); 4497 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4498 ierr = MatDestroy(&B);CHKERRQ(ierr); 4499 PetscFunctionReturn(0); 4500 } 4501 4502 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4503 { 4504 PetscErrorCode ierr; 4505 Mat_Merge_SeqsToMPI *merge; 4506 PetscContainer container; 4507 4508 PetscFunctionBegin; 4509 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4510 if (container) { 4511 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4512 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4513 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4514 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4515 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4516 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4517 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4518 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4519 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4520 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4521 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4522 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4523 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4524 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4525 ierr = PetscFree(merge);CHKERRQ(ierr); 4526 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4527 } 4528 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4529 PetscFunctionReturn(0); 4530 } 4531 4532 #include <../src/mat/utils/freespace.h> 4533 #include <petscbt.h> 4534 4535 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4536 { 4537 PetscErrorCode ierr; 4538 MPI_Comm comm; 4539 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4540 PetscMPIInt size,rank,taga,*len_s; 4541 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4542 PetscInt proc,m; 4543 PetscInt **buf_ri,**buf_rj; 4544 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4545 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4546 MPI_Request *s_waits,*r_waits; 4547 MPI_Status *status; 4548 MatScalar *aa=a->a; 4549 MatScalar **abuf_r,*ba_i; 4550 Mat_Merge_SeqsToMPI *merge; 4551 PetscContainer container; 4552 4553 PetscFunctionBegin; 4554 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4555 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4556 4557 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4558 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4559 4560 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4561 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4562 4563 bi = merge->bi; 4564 bj = merge->bj; 4565 buf_ri = merge->buf_ri; 4566 buf_rj = merge->buf_rj; 4567 4568 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4569 owners = merge->rowmap->range; 4570 len_s = merge->len_s; 4571 4572 /* send and recv matrix values */ 4573 /*-----------------------------*/ 4574 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4575 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4576 4577 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4578 for (proc=0,k=0; proc<size; proc++) { 4579 if (!len_s[proc]) continue; 4580 i = owners[proc]; 4581 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4582 k++; 4583 } 4584 4585 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4586 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4587 ierr = PetscFree(status);CHKERRQ(ierr); 4588 4589 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4590 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4591 4592 /* insert mat values of mpimat */ 4593 /*----------------------------*/ 4594 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4595 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4596 4597 for (k=0; k<merge->nrecv; k++) { 4598 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4599 nrows = *(buf_ri_k[k]); 4600 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4601 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4602 } 4603 4604 /* set values of ba */ 4605 m = merge->rowmap->n; 4606 for (i=0; i<m; i++) { 4607 arow = owners[rank] + i; 4608 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4609 bnzi = bi[i+1] - bi[i]; 4610 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4611 4612 /* add local non-zero vals of this proc's seqmat into ba */ 4613 anzi = ai[arow+1] - ai[arow]; 4614 aj = a->j + ai[arow]; 4615 aa = a->a + ai[arow]; 4616 nextaj = 0; 4617 for (j=0; nextaj<anzi; j++) { 4618 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4619 ba_i[j] += aa[nextaj++]; 4620 } 4621 } 4622 4623 /* add received vals into ba */ 4624 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4625 /* i-th row */ 4626 if (i == *nextrow[k]) { 4627 anzi = *(nextai[k]+1) - *nextai[k]; 4628 aj = buf_rj[k] + *(nextai[k]); 4629 aa = abuf_r[k] + *(nextai[k]); 4630 nextaj = 0; 4631 for (j=0; nextaj<anzi; j++) { 4632 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4633 ba_i[j] += aa[nextaj++]; 4634 } 4635 } 4636 nextrow[k]++; nextai[k]++; 4637 } 4638 } 4639 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4640 } 4641 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4642 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4643 4644 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4645 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4646 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4647 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4648 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4649 PetscFunctionReturn(0); 4650 } 4651 4652 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4653 { 4654 PetscErrorCode ierr; 4655 Mat B_mpi; 4656 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4657 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4658 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4659 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4660 PetscInt len,proc,*dnz,*onz,bs,cbs; 4661 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4662 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4663 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4664 MPI_Status *status; 4665 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4666 PetscBT lnkbt; 4667 Mat_Merge_SeqsToMPI *merge; 4668 PetscContainer container; 4669 4670 PetscFunctionBegin; 4671 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4672 4673 /* make sure it is a PETSc comm */ 4674 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4676 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4677 4678 ierr = PetscNew(&merge);CHKERRQ(ierr); 4679 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4680 4681 /* determine row ownership */ 4682 /*---------------------------------------------------------*/ 4683 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4684 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4685 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4686 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4687 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4688 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4689 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4690 4691 m = merge->rowmap->n; 4692 owners = merge->rowmap->range; 4693 4694 /* determine the number of messages to send, their lengths */ 4695 /*---------------------------------------------------------*/ 4696 len_s = merge->len_s; 4697 4698 len = 0; /* length of buf_si[] */ 4699 merge->nsend = 0; 4700 for (proc=0; proc<size; proc++) { 4701 len_si[proc] = 0; 4702 if (proc == rank) { 4703 len_s[proc] = 0; 4704 } else { 4705 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4706 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4707 } 4708 if (len_s[proc]) { 4709 merge->nsend++; 4710 nrows = 0; 4711 for (i=owners[proc]; i<owners[proc+1]; i++) { 4712 if (ai[i+1] > ai[i]) nrows++; 4713 } 4714 len_si[proc] = 2*(nrows+1); 4715 len += len_si[proc]; 4716 } 4717 } 4718 4719 /* determine the number and length of messages to receive for ij-structure */ 4720 /*-------------------------------------------------------------------------*/ 4721 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4722 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4723 4724 /* post the Irecv of j-structure */ 4725 /*-------------------------------*/ 4726 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4727 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4728 4729 /* post the Isend of j-structure */ 4730 /*--------------------------------*/ 4731 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4732 4733 for (proc=0, k=0; proc<size; proc++) { 4734 if (!len_s[proc]) continue; 4735 i = owners[proc]; 4736 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4737 k++; 4738 } 4739 4740 /* receives and sends of j-structure are complete */ 4741 /*------------------------------------------------*/ 4742 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4743 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4744 4745 /* send and recv i-structure */ 4746 /*---------------------------*/ 4747 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4748 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4749 4750 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4751 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4752 for (proc=0,k=0; proc<size; proc++) { 4753 if (!len_s[proc]) continue; 4754 /* form outgoing message for i-structure: 4755 buf_si[0]: nrows to be sent 4756 [1:nrows]: row index (global) 4757 [nrows+1:2*nrows+1]: i-structure index 4758 */ 4759 /*-------------------------------------------*/ 4760 nrows = len_si[proc]/2 - 1; 4761 buf_si_i = buf_si + nrows+1; 4762 buf_si[0] = nrows; 4763 buf_si_i[0] = 0; 4764 nrows = 0; 4765 for (i=owners[proc]; i<owners[proc+1]; i++) { 4766 anzi = ai[i+1] - ai[i]; 4767 if (anzi) { 4768 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4769 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4770 nrows++; 4771 } 4772 } 4773 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4774 k++; 4775 buf_si += len_si[proc]; 4776 } 4777 4778 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4779 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4780 4781 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4782 for (i=0; i<merge->nrecv; i++) { 4783 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4784 } 4785 4786 ierr = PetscFree(len_si);CHKERRQ(ierr); 4787 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4788 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4789 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4790 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4791 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4792 ierr = PetscFree(status);CHKERRQ(ierr); 4793 4794 /* compute a local seq matrix in each processor */ 4795 /*----------------------------------------------*/ 4796 /* allocate bi array and free space for accumulating nonzero column info */ 4797 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4798 bi[0] = 0; 4799 4800 /* create and initialize a linked list */ 4801 nlnk = N+1; 4802 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4803 4804 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4805 len = ai[owners[rank+1]] - ai[owners[rank]]; 4806 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4807 4808 current_space = free_space; 4809 4810 /* determine symbolic info for each local row */ 4811 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4812 4813 for (k=0; k<merge->nrecv; k++) { 4814 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4815 nrows = *buf_ri_k[k]; 4816 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4817 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4818 } 4819 4820 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4821 len = 0; 4822 for (i=0; i<m; i++) { 4823 bnzi = 0; 4824 /* add local non-zero cols of this proc's seqmat into lnk */ 4825 arow = owners[rank] + i; 4826 anzi = ai[arow+1] - ai[arow]; 4827 aj = a->j + ai[arow]; 4828 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4829 bnzi += nlnk; 4830 /* add received col data into lnk */ 4831 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4832 if (i == *nextrow[k]) { /* i-th row */ 4833 anzi = *(nextai[k]+1) - *nextai[k]; 4834 aj = buf_rj[k] + *nextai[k]; 4835 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4836 bnzi += nlnk; 4837 nextrow[k]++; nextai[k]++; 4838 } 4839 } 4840 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4841 4842 /* if free space is not available, make more free space */ 4843 if (current_space->local_remaining<bnzi) { 4844 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4845 nspacedouble++; 4846 } 4847 /* copy data into free space, then initialize lnk */ 4848 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4849 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4850 4851 current_space->array += bnzi; 4852 current_space->local_used += bnzi; 4853 current_space->local_remaining -= bnzi; 4854 4855 bi[i+1] = bi[i] + bnzi; 4856 } 4857 4858 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4859 4860 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4861 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4862 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4863 4864 /* create symbolic parallel matrix B_mpi */ 4865 /*---------------------------------------*/ 4866 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4867 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4868 if (n==PETSC_DECIDE) { 4869 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4870 } else { 4871 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4872 } 4873 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4874 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4875 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4876 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4877 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4878 4879 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4880 B_mpi->assembled = PETSC_FALSE; 4881 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4882 merge->bi = bi; 4883 merge->bj = bj; 4884 merge->buf_ri = buf_ri; 4885 merge->buf_rj = buf_rj; 4886 merge->coi = NULL; 4887 merge->coj = NULL; 4888 merge->owners_co = NULL; 4889 4890 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4891 4892 /* attach the supporting struct to B_mpi for reuse */ 4893 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4894 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4895 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4896 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4897 *mpimat = B_mpi; 4898 4899 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 4903 /*@C 4904 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4905 matrices from each processor 4906 4907 Collective 4908 4909 Input Parameters: 4910 + comm - the communicators the parallel matrix will live on 4911 . seqmat - the input sequential matrices 4912 . m - number of local rows (or PETSC_DECIDE) 4913 . n - number of local columns (or PETSC_DECIDE) 4914 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4915 4916 Output Parameter: 4917 . mpimat - the parallel matrix generated 4918 4919 Level: advanced 4920 4921 Notes: 4922 The dimensions of the sequential matrix in each processor MUST be the same. 4923 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4924 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4925 @*/ 4926 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4927 { 4928 PetscErrorCode ierr; 4929 PetscMPIInt size; 4930 4931 PetscFunctionBegin; 4932 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4933 if (size == 1) { 4934 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4935 if (scall == MAT_INITIAL_MATRIX) { 4936 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4937 } else { 4938 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4939 } 4940 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4941 PetscFunctionReturn(0); 4942 } 4943 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4944 if (scall == MAT_INITIAL_MATRIX) { 4945 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4946 } 4947 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4948 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4949 PetscFunctionReturn(0); 4950 } 4951 4952 /*@ 4953 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4954 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4955 with MatGetSize() 4956 4957 Not Collective 4958 4959 Input Parameters: 4960 + A - the matrix 4961 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4962 4963 Output Parameter: 4964 . A_loc - the local sequential matrix generated 4965 4966 Level: developer 4967 4968 Notes: 4969 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4970 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4971 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4972 modify the values of the returned A_loc. 4973 4974 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4975 4976 @*/ 4977 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4978 { 4979 PetscErrorCode ierr; 4980 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4981 Mat_SeqAIJ *mat,*a,*b; 4982 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4983 MatScalar *aa,*ba,*cam; 4984 PetscScalar *ca; 4985 PetscMPIInt size; 4986 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4987 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4988 PetscBool match; 4989 4990 PetscFunctionBegin; 4991 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4992 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4993 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4994 if (size == 1) { 4995 if (scall == MAT_INITIAL_MATRIX) { 4996 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4997 *A_loc = mpimat->A; 4998 } else if (scall == MAT_REUSE_MATRIX) { 4999 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5000 } 5001 PetscFunctionReturn(0); 5002 } 5003 5004 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5005 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5006 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5007 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5008 aa = a->a; ba = b->a; 5009 if (scall == MAT_INITIAL_MATRIX) { 5010 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5011 ci[0] = 0; 5012 for (i=0; i<am; i++) { 5013 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5014 } 5015 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5016 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5017 k = 0; 5018 for (i=0; i<am; i++) { 5019 ncols_o = bi[i+1] - bi[i]; 5020 ncols_d = ai[i+1] - ai[i]; 5021 /* off-diagonal portion of A */ 5022 for (jo=0; jo<ncols_o; jo++) { 5023 col = cmap[*bj]; 5024 if (col >= cstart) break; 5025 cj[k] = col; bj++; 5026 ca[k++] = *ba++; 5027 } 5028 /* diagonal portion of A */ 5029 for (j=0; j<ncols_d; j++) { 5030 cj[k] = cstart + *aj++; 5031 ca[k++] = *aa++; 5032 } 5033 /* off-diagonal portion of A */ 5034 for (j=jo; j<ncols_o; j++) { 5035 cj[k] = cmap[*bj++]; 5036 ca[k++] = *ba++; 5037 } 5038 } 5039 /* put together the new matrix */ 5040 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5041 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5042 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5043 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5044 mat->free_a = PETSC_TRUE; 5045 mat->free_ij = PETSC_TRUE; 5046 mat->nonew = 0; 5047 } else if (scall == MAT_REUSE_MATRIX) { 5048 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5049 ci = mat->i; cj = mat->j; cam = mat->a; 5050 for (i=0; i<am; i++) { 5051 /* off-diagonal portion of A */ 5052 ncols_o = bi[i+1] - bi[i]; 5053 for (jo=0; jo<ncols_o; jo++) { 5054 col = cmap[*bj]; 5055 if (col >= cstart) break; 5056 *cam++ = *ba++; bj++; 5057 } 5058 /* diagonal portion of A */ 5059 ncols_d = ai[i+1] - ai[i]; 5060 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5061 /* off-diagonal portion of A */ 5062 for (j=jo; j<ncols_o; j++) { 5063 *cam++ = *ba++; bj++; 5064 } 5065 } 5066 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5067 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5068 PetscFunctionReturn(0); 5069 } 5070 5071 /*@C 5072 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5073 5074 Not Collective 5075 5076 Input Parameters: 5077 + A - the matrix 5078 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5079 - row, col - index sets of rows and columns to extract (or NULL) 5080 5081 Output Parameter: 5082 . A_loc - the local sequential matrix generated 5083 5084 Level: developer 5085 5086 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5087 5088 @*/ 5089 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5090 { 5091 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5092 PetscErrorCode ierr; 5093 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5094 IS isrowa,iscola; 5095 Mat *aloc; 5096 PetscBool match; 5097 5098 PetscFunctionBegin; 5099 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5100 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5101 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5102 if (!row) { 5103 start = A->rmap->rstart; end = A->rmap->rend; 5104 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5105 } else { 5106 isrowa = *row; 5107 } 5108 if (!col) { 5109 start = A->cmap->rstart; 5110 cmap = a->garray; 5111 nzA = a->A->cmap->n; 5112 nzB = a->B->cmap->n; 5113 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5114 ncols = 0; 5115 for (i=0; i<nzB; i++) { 5116 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5117 else break; 5118 } 5119 imark = i; 5120 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5121 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5122 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5123 } else { 5124 iscola = *col; 5125 } 5126 if (scall != MAT_INITIAL_MATRIX) { 5127 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5128 aloc[0] = *A_loc; 5129 } 5130 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5131 if (!col) { /* attach global id of condensed columns */ 5132 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5133 } 5134 *A_loc = aloc[0]; 5135 ierr = PetscFree(aloc);CHKERRQ(ierr); 5136 if (!row) { 5137 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5138 } 5139 if (!col) { 5140 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5141 } 5142 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5143 PetscFunctionReturn(0); 5144 } 5145 5146 /* 5147 * Destroy a mat that may be composed with PetscSF communication objects. 5148 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5149 * */ 5150 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5151 { 5152 PetscSF sf,osf; 5153 IS map; 5154 PetscErrorCode ierr; 5155 5156 PetscFunctionBegin; 5157 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5158 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5159 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5160 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5161 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5162 ierr = ISDestroy(&map);CHKERRQ(ierr); 5163 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5164 PetscFunctionReturn(0); 5165 } 5166 5167 /* 5168 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5169 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5170 * on a global size. 5171 * */ 5172 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5173 { 5174 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5175 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5176 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5177 PetscMPIInt owner; 5178 PetscSFNode *iremote,*oiremote; 5179 const PetscInt *lrowindices; 5180 PetscErrorCode ierr; 5181 PetscSF sf,osf; 5182 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5183 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5184 MPI_Comm comm; 5185 ISLocalToGlobalMapping mapping; 5186 5187 PetscFunctionBegin; 5188 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5189 /* plocalsize is the number of roots 5190 * nrows is the number of leaves 5191 * */ 5192 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5193 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5194 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5195 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5196 for (i=0;i<nrows;i++) { 5197 /* Find a remote index and an owner for a row 5198 * The row could be local or remote 5199 * */ 5200 owner = 0; 5201 lidx = 0; 5202 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5203 iremote[i].index = lidx; 5204 iremote[i].rank = owner; 5205 } 5206 /* Create SF to communicate how many nonzero columns for each row */ 5207 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5208 /* SF will figure out the number of nonzero colunms for each row, and their 5209 * offsets 5210 * */ 5211 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5212 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5213 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5214 5215 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5216 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5217 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5218 roffsets[0] = 0; 5219 roffsets[1] = 0; 5220 for (i=0;i<plocalsize;i++) { 5221 /* diag */ 5222 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5223 /* off diag */ 5224 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5225 /* compute offsets so that we relative location for each row */ 5226 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5227 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5228 } 5229 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5230 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5231 /* 'r' means root, and 'l' means leaf */ 5232 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5233 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5234 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5235 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5236 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5237 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5238 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5239 dntotalcols = 0; 5240 ontotalcols = 0; 5241 ncol = 0; 5242 for (i=0;i<nrows;i++) { 5243 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5244 ncol = PetscMax(pnnz[i],ncol); 5245 /* diag */ 5246 dntotalcols += nlcols[i*2+0]; 5247 /* off diag */ 5248 ontotalcols += nlcols[i*2+1]; 5249 } 5250 /* We do not need to figure the right number of columns 5251 * since all the calculations will be done by going through the raw data 5252 * */ 5253 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5254 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5255 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5256 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5257 /* diag */ 5258 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5259 /* off diag */ 5260 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5261 /* diag */ 5262 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5263 /* off diag */ 5264 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5265 dntotalcols = 0; 5266 ontotalcols = 0; 5267 ntotalcols = 0; 5268 for (i=0;i<nrows;i++) { 5269 owner = 0; 5270 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5271 /* Set iremote for diag matrix */ 5272 for (j=0;j<nlcols[i*2+0];j++) { 5273 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5274 iremote[dntotalcols].rank = owner; 5275 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5276 ilocal[dntotalcols++] = ntotalcols++; 5277 } 5278 /* off diag */ 5279 for (j=0;j<nlcols[i*2+1];j++) { 5280 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5281 oiremote[ontotalcols].rank = owner; 5282 oilocal[ontotalcols++] = ntotalcols++; 5283 } 5284 } 5285 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5286 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5287 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5288 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5289 /* P serves as roots and P_oth is leaves 5290 * Diag matrix 5291 * */ 5292 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5293 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5294 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5295 5296 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5297 /* Off diag */ 5298 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5299 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5300 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5301 /* We operate on the matrix internal data for saving memory */ 5302 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5303 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5304 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5305 /* Convert to global indices for diag matrix */ 5306 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5307 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5308 /* We want P_oth store global indices */ 5309 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5310 /* Use memory scalable approach */ 5311 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5312 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5313 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5314 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5315 /* Convert back to local indices */ 5316 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5317 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5318 nout = 0; 5319 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5320 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5321 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5322 /* Exchange values */ 5323 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5324 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5325 /* Stop PETSc from shrinking memory */ 5326 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5327 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5328 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5329 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5330 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5331 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5332 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5333 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5334 PetscFunctionReturn(0); 5335 } 5336 5337 /* 5338 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5339 * This supports MPIAIJ and MAIJ 5340 * */ 5341 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5342 { 5343 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5344 Mat_SeqAIJ *p_oth; 5345 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5346 IS rows,map; 5347 PetscHMapI hamp; 5348 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5349 MPI_Comm comm; 5350 PetscSF sf,osf; 5351 PetscBool has; 5352 PetscErrorCode ierr; 5353 5354 PetscFunctionBegin; 5355 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5356 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5357 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5358 * and then create a submatrix (that often is an overlapping matrix) 5359 * */ 5360 if (reuse==MAT_INITIAL_MATRIX) { 5361 /* Use a hash table to figure out unique keys */ 5362 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5363 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5364 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5365 count = 0; 5366 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5367 for (i=0;i<a->B->cmap->n;i++) { 5368 key = a->garray[i]/dof; 5369 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5370 if (!has) { 5371 mapping[i] = count; 5372 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5373 } else { 5374 /* Current 'i' has the same value the previous step */ 5375 mapping[i] = count-1; 5376 } 5377 } 5378 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5379 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5380 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5381 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5382 off = 0; 5383 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5384 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5385 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5386 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5387 /* In case, the matrix was already created but users want to recreate the matrix */ 5388 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5389 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5390 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5391 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5392 } else if (reuse==MAT_REUSE_MATRIX) { 5393 /* If matrix was already created, we simply update values using SF objects 5394 * that as attached to the matrix ealier. 5395 * */ 5396 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5397 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5398 if (!sf || !osf) { 5399 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5400 } 5401 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5402 /* Update values in place */ 5403 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5404 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5405 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5406 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5407 } else { 5408 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5409 } 5410 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5411 PetscFunctionReturn(0); 5412 } 5413 5414 /*@C 5415 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5416 5417 Collective on Mat 5418 5419 Input Parameters: 5420 + A,B - the matrices in mpiaij format 5421 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5422 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5423 5424 Output Parameter: 5425 + rowb, colb - index sets of rows and columns of B to extract 5426 - B_seq - the sequential matrix generated 5427 5428 Level: developer 5429 5430 @*/ 5431 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5432 { 5433 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5434 PetscErrorCode ierr; 5435 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5436 IS isrowb,iscolb; 5437 Mat *bseq=NULL; 5438 5439 PetscFunctionBegin; 5440 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5441 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5442 } 5443 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5444 5445 if (scall == MAT_INITIAL_MATRIX) { 5446 start = A->cmap->rstart; 5447 cmap = a->garray; 5448 nzA = a->A->cmap->n; 5449 nzB = a->B->cmap->n; 5450 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5451 ncols = 0; 5452 for (i=0; i<nzB; i++) { /* row < local row index */ 5453 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5454 else break; 5455 } 5456 imark = i; 5457 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5458 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5459 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5460 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5461 } else { 5462 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5463 isrowb = *rowb; iscolb = *colb; 5464 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5465 bseq[0] = *B_seq; 5466 } 5467 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5468 *B_seq = bseq[0]; 5469 ierr = PetscFree(bseq);CHKERRQ(ierr); 5470 if (!rowb) { 5471 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5472 } else { 5473 *rowb = isrowb; 5474 } 5475 if (!colb) { 5476 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5477 } else { 5478 *colb = iscolb; 5479 } 5480 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5481 PetscFunctionReturn(0); 5482 } 5483 5484 /* 5485 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5486 of the OFF-DIAGONAL portion of local A 5487 5488 Collective on Mat 5489 5490 Input Parameters: 5491 + A,B - the matrices in mpiaij format 5492 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5493 5494 Output Parameter: 5495 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5496 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5497 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5498 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5499 5500 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5501 for this matrix. This is not desirable.. 5502 5503 Level: developer 5504 5505 */ 5506 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5507 { 5508 PetscErrorCode ierr; 5509 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5510 Mat_SeqAIJ *b_oth; 5511 VecScatter ctx; 5512 MPI_Comm comm; 5513 const PetscMPIInt *rprocs,*sprocs; 5514 const PetscInt *srow,*rstarts,*sstarts; 5515 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5516 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5517 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5518 MPI_Request *rwaits = NULL,*swaits = NULL; 5519 MPI_Status rstatus; 5520 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5521 5522 PetscFunctionBegin; 5523 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5525 5526 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5527 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5528 } 5529 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5530 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5531 5532 if (size == 1) { 5533 startsj_s = NULL; 5534 bufa_ptr = NULL; 5535 *B_oth = NULL; 5536 PetscFunctionReturn(0); 5537 } 5538 5539 ctx = a->Mvctx; 5540 tag = ((PetscObject)ctx)->tag; 5541 5542 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5543 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5544 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5545 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5546 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5547 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5548 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5549 5550 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5551 if (scall == MAT_INITIAL_MATRIX) { 5552 /* i-array */ 5553 /*---------*/ 5554 /* post receives */ 5555 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5556 for (i=0; i<nrecvs; i++) { 5557 rowlen = rvalues + rstarts[i]*rbs; 5558 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5559 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5560 } 5561 5562 /* pack the outgoing message */ 5563 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5564 5565 sstartsj[0] = 0; 5566 rstartsj[0] = 0; 5567 len = 0; /* total length of j or a array to be sent */ 5568 if (nsends) { 5569 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5570 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5571 } 5572 for (i=0; i<nsends; i++) { 5573 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5574 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5575 for (j=0; j<nrows; j++) { 5576 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5577 for (l=0; l<sbs; l++) { 5578 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5579 5580 rowlen[j*sbs+l] = ncols; 5581 5582 len += ncols; 5583 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5584 } 5585 k++; 5586 } 5587 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5588 5589 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5590 } 5591 /* recvs and sends of i-array are completed */ 5592 i = nrecvs; 5593 while (i--) { 5594 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5595 } 5596 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5597 ierr = PetscFree(svalues);CHKERRQ(ierr); 5598 5599 /* allocate buffers for sending j and a arrays */ 5600 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5601 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5602 5603 /* create i-array of B_oth */ 5604 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5605 5606 b_othi[0] = 0; 5607 len = 0; /* total length of j or a array to be received */ 5608 k = 0; 5609 for (i=0; i<nrecvs; i++) { 5610 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5611 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5612 for (j=0; j<nrows; j++) { 5613 b_othi[k+1] = b_othi[k] + rowlen[j]; 5614 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5615 k++; 5616 } 5617 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5618 } 5619 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5620 5621 /* allocate space for j and a arrrays of B_oth */ 5622 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5623 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5624 5625 /* j-array */ 5626 /*---------*/ 5627 /* post receives of j-array */ 5628 for (i=0; i<nrecvs; i++) { 5629 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5630 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5631 } 5632 5633 /* pack the outgoing message j-array */ 5634 if (nsends) k = sstarts[0]; 5635 for (i=0; i<nsends; i++) { 5636 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5637 bufJ = bufj+sstartsj[i]; 5638 for (j=0; j<nrows; j++) { 5639 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5640 for (ll=0; ll<sbs; ll++) { 5641 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5642 for (l=0; l<ncols; l++) { 5643 *bufJ++ = cols[l]; 5644 } 5645 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5646 } 5647 } 5648 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5649 } 5650 5651 /* recvs and sends of j-array are completed */ 5652 i = nrecvs; 5653 while (i--) { 5654 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5655 } 5656 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5657 } else if (scall == MAT_REUSE_MATRIX) { 5658 sstartsj = *startsj_s; 5659 rstartsj = *startsj_r; 5660 bufa = *bufa_ptr; 5661 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5662 b_otha = b_oth->a; 5663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5664 5665 /* a-array */ 5666 /*---------*/ 5667 /* post receives of a-array */ 5668 for (i=0; i<nrecvs; i++) { 5669 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5670 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5671 } 5672 5673 /* pack the outgoing message a-array */ 5674 if (nsends) k = sstarts[0]; 5675 for (i=0; i<nsends; i++) { 5676 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5677 bufA = bufa+sstartsj[i]; 5678 for (j=0; j<nrows; j++) { 5679 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5680 for (ll=0; ll<sbs; ll++) { 5681 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5682 for (l=0; l<ncols; l++) { 5683 *bufA++ = vals[l]; 5684 } 5685 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5686 } 5687 } 5688 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5689 } 5690 /* recvs and sends of a-array are completed */ 5691 i = nrecvs; 5692 while (i--) { 5693 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5694 } 5695 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5696 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5697 5698 if (scall == MAT_INITIAL_MATRIX) { 5699 /* put together the new matrix */ 5700 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5701 5702 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5703 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5704 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5705 b_oth->free_a = PETSC_TRUE; 5706 b_oth->free_ij = PETSC_TRUE; 5707 b_oth->nonew = 0; 5708 5709 ierr = PetscFree(bufj);CHKERRQ(ierr); 5710 if (!startsj_s || !bufa_ptr) { 5711 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5712 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5713 } else { 5714 *startsj_s = sstartsj; 5715 *startsj_r = rstartsj; 5716 *bufa_ptr = bufa; 5717 } 5718 } 5719 5720 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5721 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5722 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5723 PetscFunctionReturn(0); 5724 } 5725 5726 /*@C 5727 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5728 5729 Not Collective 5730 5731 Input Parameters: 5732 . A - The matrix in mpiaij format 5733 5734 Output Parameter: 5735 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5736 . colmap - A map from global column index to local index into lvec 5737 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5738 5739 Level: developer 5740 5741 @*/ 5742 #if defined(PETSC_USE_CTABLE) 5743 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5744 #else 5745 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5746 #endif 5747 { 5748 Mat_MPIAIJ *a; 5749 5750 PetscFunctionBegin; 5751 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5752 PetscValidPointer(lvec, 2); 5753 PetscValidPointer(colmap, 3); 5754 PetscValidPointer(multScatter, 4); 5755 a = (Mat_MPIAIJ*) A->data; 5756 if (lvec) *lvec = a->lvec; 5757 if (colmap) *colmap = a->colmap; 5758 if (multScatter) *multScatter = a->Mvctx; 5759 PetscFunctionReturn(0); 5760 } 5761 5762 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5763 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5764 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5765 #if defined(PETSC_HAVE_MKL_SPARSE) 5766 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5767 #endif 5768 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5769 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5770 #if defined(PETSC_HAVE_ELEMENTAL) 5771 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5772 #endif 5773 #if defined(PETSC_HAVE_HYPRE) 5774 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5775 #endif 5776 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5777 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5778 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5779 5780 /* 5781 Computes (B'*A')' since computing B*A directly is untenable 5782 5783 n p p 5784 ( ) ( ) ( ) 5785 m ( A ) * n ( B ) = m ( C ) 5786 ( ) ( ) ( ) 5787 5788 */ 5789 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5790 { 5791 PetscErrorCode ierr; 5792 Mat At,Bt,Ct; 5793 5794 PetscFunctionBegin; 5795 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5796 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5797 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5798 ierr = MatDestroy(&At);CHKERRQ(ierr); 5799 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5800 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5801 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5802 PetscFunctionReturn(0); 5803 } 5804 5805 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5806 { 5807 PetscErrorCode ierr; 5808 PetscInt m=A->rmap->n,n=B->cmap->n; 5809 5810 PetscFunctionBegin; 5811 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5812 ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5813 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5814 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 5815 ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr); 5816 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5817 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5818 5819 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5820 PetscFunctionReturn(0); 5821 } 5822 5823 /* ----------------------------------------------------------------*/ 5824 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5825 { 5826 Mat_Product *product = C->product; 5827 Mat A = product->A,B=product->B; 5828 5829 PetscFunctionBegin; 5830 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5831 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5832 5833 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5834 C->ops->productsymbolic = MatProductSymbolic_AB; 5835 PetscFunctionReturn(0); 5836 } 5837 5838 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5839 { 5840 PetscErrorCode ierr; 5841 Mat_Product *product = C->product; 5842 5843 PetscFunctionBegin; 5844 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 5845 if (product->type == MATPRODUCT_AB) { 5846 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5847 } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]); 5848 PetscFunctionReturn(0); 5849 } 5850 /* ----------------------------------------------------------------*/ 5851 5852 /*MC 5853 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5854 5855 Options Database Keys: 5856 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5857 5858 Level: beginner 5859 5860 Notes: 5861 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5862 in this case the values associated with the rows and columns one passes in are set to zero 5863 in the matrix 5864 5865 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5866 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5867 5868 .seealso: MatCreateAIJ() 5869 M*/ 5870 5871 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5872 { 5873 Mat_MPIAIJ *b; 5874 PetscErrorCode ierr; 5875 PetscMPIInt size; 5876 5877 PetscFunctionBegin; 5878 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5879 5880 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5881 B->data = (void*)b; 5882 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5883 B->assembled = PETSC_FALSE; 5884 B->insertmode = NOT_SET_VALUES; 5885 b->size = size; 5886 5887 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5888 5889 /* build cache for off array entries formed */ 5890 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5891 5892 b->donotstash = PETSC_FALSE; 5893 b->colmap = 0; 5894 b->garray = 0; 5895 b->roworiented = PETSC_TRUE; 5896 5897 /* stuff used for matrix vector multiply */ 5898 b->lvec = NULL; 5899 b->Mvctx = NULL; 5900 5901 /* stuff for MatGetRow() */ 5902 b->rowindices = 0; 5903 b->rowvalues = 0; 5904 b->getrowactive = PETSC_FALSE; 5905 5906 /* flexible pointer used in CUSP/CUSPARSE classes */ 5907 b->spptr = NULL; 5908 5909 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5910 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5911 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5912 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5913 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5914 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5915 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5916 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5917 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5918 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5919 #if defined(PETSC_HAVE_MKL_SPARSE) 5920 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5921 #endif 5922 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5923 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5924 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5925 #if defined(PETSC_HAVE_ELEMENTAL) 5926 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5927 #endif 5928 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5929 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5930 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5931 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5932 #if defined(PETSC_HAVE_HYPRE) 5933 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5934 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5935 #endif 5936 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5937 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5938 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5939 PetscFunctionReturn(0); 5940 } 5941 5942 /*@C 5943 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5944 and "off-diagonal" part of the matrix in CSR format. 5945 5946 Collective 5947 5948 Input Parameters: 5949 + comm - MPI communicator 5950 . m - number of local rows (Cannot be PETSC_DECIDE) 5951 . n - This value should be the same as the local size used in creating the 5952 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5953 calculated if N is given) For square matrices n is almost always m. 5954 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5955 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5956 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5957 . j - column indices 5958 . a - matrix values 5959 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5960 . oj - column indices 5961 - oa - matrix values 5962 5963 Output Parameter: 5964 . mat - the matrix 5965 5966 Level: advanced 5967 5968 Notes: 5969 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5970 must free the arrays once the matrix has been destroyed and not before. 5971 5972 The i and j indices are 0 based 5973 5974 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5975 5976 This sets local rows and cannot be used to set off-processor values. 5977 5978 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5979 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5980 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5981 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5982 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5983 communication if it is known that only local entries will be set. 5984 5985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5986 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5987 @*/ 5988 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5989 { 5990 PetscErrorCode ierr; 5991 Mat_MPIAIJ *maij; 5992 5993 PetscFunctionBegin; 5994 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5995 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5996 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5997 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5998 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5999 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6000 maij = (Mat_MPIAIJ*) (*mat)->data; 6001 6002 (*mat)->preallocated = PETSC_TRUE; 6003 6004 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6005 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6006 6007 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6008 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6009 6010 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6011 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6012 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6013 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6014 6015 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6016 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6017 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6018 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6019 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6020 PetscFunctionReturn(0); 6021 } 6022 6023 /* 6024 Special version for direct calls from Fortran 6025 */ 6026 #include <petsc/private/fortranimpl.h> 6027 6028 /* Change these macros so can be used in void function */ 6029 #undef CHKERRQ 6030 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6031 #undef SETERRQ2 6032 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6033 #undef SETERRQ3 6034 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6035 #undef SETERRQ 6036 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6037 6038 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6039 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6040 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6041 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6042 #else 6043 #endif 6044 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6045 { 6046 Mat mat = *mmat; 6047 PetscInt m = *mm, n = *mn; 6048 InsertMode addv = *maddv; 6049 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6050 PetscScalar value; 6051 PetscErrorCode ierr; 6052 6053 MatCheckPreallocated(mat,1); 6054 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6055 6056 #if defined(PETSC_USE_DEBUG) 6057 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6058 #endif 6059 { 6060 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6061 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6062 PetscBool roworiented = aij->roworiented; 6063 6064 /* Some Variables required in the macro */ 6065 Mat A = aij->A; 6066 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6067 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6068 MatScalar *aa = a->a; 6069 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6070 Mat B = aij->B; 6071 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6072 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6073 MatScalar *ba = b->a; 6074 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6075 * cannot use "#if defined" inside a macro. */ 6076 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6077 6078 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6079 PetscInt nonew = a->nonew; 6080 MatScalar *ap1,*ap2; 6081 6082 PetscFunctionBegin; 6083 for (i=0; i<m; i++) { 6084 if (im[i] < 0) continue; 6085 #if defined(PETSC_USE_DEBUG) 6086 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6087 #endif 6088 if (im[i] >= rstart && im[i] < rend) { 6089 row = im[i] - rstart; 6090 lastcol1 = -1; 6091 rp1 = aj + ai[row]; 6092 ap1 = aa + ai[row]; 6093 rmax1 = aimax[row]; 6094 nrow1 = ailen[row]; 6095 low1 = 0; 6096 high1 = nrow1; 6097 lastcol2 = -1; 6098 rp2 = bj + bi[row]; 6099 ap2 = ba + bi[row]; 6100 rmax2 = bimax[row]; 6101 nrow2 = bilen[row]; 6102 low2 = 0; 6103 high2 = nrow2; 6104 6105 for (j=0; j<n; j++) { 6106 if (roworiented) value = v[i*n+j]; 6107 else value = v[i+j*m]; 6108 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6109 if (in[j] >= cstart && in[j] < cend) { 6110 col = in[j] - cstart; 6111 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6112 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6113 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6114 #endif 6115 } else if (in[j] < 0) continue; 6116 #if defined(PETSC_USE_DEBUG) 6117 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6118 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6119 #endif 6120 else { 6121 if (mat->was_assembled) { 6122 if (!aij->colmap) { 6123 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6124 } 6125 #if defined(PETSC_USE_CTABLE) 6126 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6127 col--; 6128 #else 6129 col = aij->colmap[in[j]] - 1; 6130 #endif 6131 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6132 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6133 col = in[j]; 6134 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6135 B = aij->B; 6136 b = (Mat_SeqAIJ*)B->data; 6137 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6138 rp2 = bj + bi[row]; 6139 ap2 = ba + bi[row]; 6140 rmax2 = bimax[row]; 6141 nrow2 = bilen[row]; 6142 low2 = 0; 6143 high2 = nrow2; 6144 bm = aij->B->rmap->n; 6145 ba = b->a; 6146 inserted = PETSC_FALSE; 6147 } 6148 } else col = in[j]; 6149 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6150 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6151 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6152 #endif 6153 } 6154 } 6155 } else if (!aij->donotstash) { 6156 if (roworiented) { 6157 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6158 } else { 6159 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6160 } 6161 } 6162 } 6163 } 6164 PetscFunctionReturnVoid(); 6165 } 6166