1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *owners = A->rmap->range; 787 PetscInt n = A->rmap->n; 788 PetscSF sf; 789 PetscInt *lrows; 790 PetscSFNode *rrows; 791 PetscInt r, p = 0, len = 0; 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 /* Create SF where leaves are input rows and roots are owned rows */ 796 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 797 for (r = 0; r < n; ++r) lrows[r] = -1; 798 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 799 for (r = 0; r < N; ++r) { 800 const PetscInt idx = rows[r]; 801 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 802 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 803 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 804 } 805 if (A->nooffproczerorows) { 806 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 807 lrows[len++] = idx - owners[p]; 808 } else { 809 rrows[r].rank = p; 810 rrows[r].index = rows[r] - owners[p]; 811 } 812 } 813 if (!A->nooffproczerorows) { 814 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 815 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 816 /* Collect flags for rows to be zeroed */ 817 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 819 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 820 /* Compress and put in row numbers */ 821 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 822 } 823 /* fix right hand side if needed */ 824 if (x && b) { 825 const PetscScalar *xx; 826 PetscScalar *bb; 827 828 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 829 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 831 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 833 } 834 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 835 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 836 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 837 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 838 } else if (diag != 0.0) { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 841 for (r = 0; r < len; ++r) { 842 const PetscInt row = lrows[r] + A->rmap->rstart; 843 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 844 } 845 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 } else { 848 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 849 } 850 ierr = PetscFree(lrows);CHKERRQ(ierr); 851 852 /* only change matrix nonzero state if pattern was allowed to be changed */ 853 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 854 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 855 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 865 PetscErrorCode ierr; 866 PetscMPIInt n = A->rmap->n; 867 PetscInt i,j,r,m,p = 0,len = 0; 868 PetscInt *lrows,*owners = A->rmap->range; 869 PetscSFNode *rrows; 870 PetscSF sf; 871 const PetscScalar *xx; 872 PetscScalar *bb,*mask; 873 Vec xmask,lmask; 874 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 875 const PetscInt *aj, *ii,*ridx; 876 PetscScalar *aa; 877 878 PetscFunctionBegin; 879 /* Create SF where leaves are input rows and roots are owned rows */ 880 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 881 for (r = 0; r < n; ++r) lrows[r] = -1; 882 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 883 for (r = 0; r < N; ++r) { 884 const PetscInt idx = rows[r]; 885 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 886 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 887 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 888 } 889 rrows[r].rank = p; 890 rrows[r].index = rows[r] - owners[p]; 891 } 892 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 893 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 894 /* Collect flags for rows to be zeroed */ 895 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 896 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 897 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 898 /* Compress and put in row numbers */ 899 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 900 /* zero diagonal part of matrix */ 901 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 902 /* handle off diagonal part of matrix */ 903 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 904 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 905 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 906 for (i=0; i<len; i++) bb[lrows[i]] = 1; 907 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 908 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 911 if (x) { 912 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 916 } 917 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 918 /* remove zeroed rows of off diagonal matrix */ 919 ii = aij->i; 920 for (i=0; i<len; i++) { 921 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 922 } 923 /* loop over all elements of off process part of matrix zeroing removed columns*/ 924 if (aij->compressedrow.use) { 925 m = aij->compressedrow.nrows; 926 ii = aij->compressedrow.i; 927 ridx = aij->compressedrow.rindex; 928 for (i=0; i<m; i++) { 929 n = ii[i+1] - ii[i]; 930 aj = aij->j + ii[i]; 931 aa = aij->a + ii[i]; 932 933 for (j=0; j<n; j++) { 934 if (PetscAbsScalar(mask[*aj])) { 935 if (b) bb[*ridx] -= *aa*xx[*aj]; 936 *aa = 0.0; 937 } 938 aa++; 939 aj++; 940 } 941 ridx++; 942 } 943 } else { /* do not use compressed row format */ 944 m = l->B->rmap->n; 945 for (i=0; i<m; i++) { 946 n = ii[i+1] - ii[i]; 947 aj = aij->j + ii[i]; 948 aa = aij->a + ii[i]; 949 for (j=0; j<n; j++) { 950 if (PetscAbsScalar(mask[*aj])) { 951 if (b) bb[i] -= *aa*xx[*aj]; 952 *aa = 0.0; 953 } 954 aa++; 955 aj++; 956 } 957 } 958 } 959 if (x) { 960 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 961 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 962 } 963 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 964 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 965 ierr = PetscFree(lrows);CHKERRQ(ierr); 966 967 /* only change matrix nonzero state if pattern was allowed to be changed */ 968 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 969 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 970 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMult_MPIAIJ" 977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscInt nt; 982 983 PetscFunctionBegin; 984 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 985 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1008 { 1009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 PetscBool merged; 1027 1028 PetscFunctionBegin; 1029 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1030 /* do nondiagonal part */ 1031 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1032 if (!merged) { 1033 /* send it on its way */ 1034 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1035 /* do local part */ 1036 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1037 /* receive remote parts: note this assumes the values are not actually */ 1038 /* added in yy until the next line, */ 1039 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1040 } else { 1041 /* do local part */ 1042 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1043 /* send it on its way */ 1044 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 /* values actually were received in the Begin() but we need to call this nop */ 1046 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1053 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1054 { 1055 MPI_Comm comm; 1056 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1057 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1058 IS Me,Notme; 1059 PetscErrorCode ierr; 1060 PetscInt M,N,first,last,*notme,i; 1061 PetscMPIInt size; 1062 1063 PetscFunctionBegin; 1064 /* Easy test: symmetric diagonal block */ 1065 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1066 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1067 if (!*f) PetscFunctionReturn(0); 1068 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1070 if (size == 1) PetscFunctionReturn(0); 1071 1072 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1073 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1074 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1076 for (i=0; i<first; i++) notme[i] = i; 1077 for (i=last; i<M; i++) notme[i-last+first] = i; 1078 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1079 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1080 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1081 Aoff = Aoffs[0]; 1082 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1083 Boff = Boffs[0]; 1084 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1085 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1086 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1087 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1088 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1089 ierr = PetscFree(notme);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 /* do nondiagonal part */ 1102 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1103 /* send it on its way */ 1104 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1105 /* do local part */ 1106 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1107 /* receive remote parts */ 1108 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* 1113 This only works correctly for square matrices where the subblock A->A is the 1114 diagonal block 1115 */ 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1119 { 1120 PetscErrorCode ierr; 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 1123 PetscFunctionBegin; 1124 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1125 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1126 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatScale_MPIAIJ" 1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1139 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatDestroy_MPIAIJ" 1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 #if defined(PETSC_USE_LOG) 1152 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1153 #endif 1154 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1156 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1157 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1158 #if defined(PETSC_USE_CTABLE) 1159 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1160 #else 1161 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1162 #endif 1163 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1164 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1165 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1166 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1167 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1168 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1169 1170 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1192 PetscErrorCode ierr; 1193 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1194 int fd; 1195 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1196 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1197 PetscScalar *column_values; 1198 PetscInt message_count,flowcontrolcount; 1199 FILE *file; 1200 1201 PetscFunctionBegin; 1202 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1203 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1204 nz = A->nz + B->nz; 1205 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1206 if (!rank) { 1207 header[0] = MAT_FILE_CLASSID; 1208 header[1] = mat->rmap->N; 1209 header[2] = mat->cmap->N; 1210 1211 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1213 /* get largest number of rows any processor has */ 1214 rlen = mat->rmap->n; 1215 range = mat->rmap->range; 1216 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1217 } else { 1218 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 rlen = mat->rmap->n; 1220 } 1221 1222 /* load up the local row counts */ 1223 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1224 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1225 1226 /* store the row lengths to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1230 for (i=1; i<size; i++) { 1231 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1232 rlen = range[i+1] - range[i]; 1233 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1235 } 1236 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1237 } else { 1238 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1239 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1241 } 1242 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1243 1244 /* load up the local column indices */ 1245 nzmax = nz; /* th processor needs space a largest processor needs */ 1246 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1248 cnt = 0; 1249 for (i=0; i<mat->rmap->n; i++) { 1250 for (j=B->i[i]; j<B->i[i+1]; j++) { 1251 if ((col = garray[B->j[j]]) > cstart) break; 1252 column_indices[cnt++] = col; 1253 } 1254 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1255 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1256 } 1257 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1258 1259 /* store the column indices to the file */ 1260 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1261 if (!rank) { 1262 MPI_Status status; 1263 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 for (i=1; i<size; i++) { 1265 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1266 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1267 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1268 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1270 } 1271 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1272 } else { 1273 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1274 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1277 } 1278 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1279 1280 /* load up the local column values */ 1281 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1282 cnt = 0; 1283 for (i=0; i<mat->rmap->n; i++) { 1284 for (j=B->i[i]; j<B->i[i+1]; j++) { 1285 if (garray[B->j[j]] > cstart) break; 1286 column_values[cnt++] = B->a[j]; 1287 } 1288 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1289 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1290 } 1291 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1292 1293 /* store the column values to the file */ 1294 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1295 if (!rank) { 1296 MPI_Status status; 1297 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1298 for (i=1; i<size; i++) { 1299 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1300 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1301 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1302 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1304 } 1305 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1308 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1310 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1311 } 1312 ierr = PetscFree(column_values);CHKERRQ(ierr); 1313 1314 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1315 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #include <petscdraw.h> 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1323 { 1324 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 PetscMPIInt rank = aij->rank,size = aij->size; 1327 PetscBool isdraw,iascii,isbinary; 1328 PetscViewer sviewer; 1329 PetscViewerFormat format; 1330 1331 PetscFunctionBegin; 1332 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1333 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1334 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1335 if (iascii) { 1336 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1337 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1338 MatInfo info; 1339 PetscBool inodes; 1340 1341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1342 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1343 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1344 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1345 if (!inodes) { 1346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1347 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1348 } else { 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1350 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1351 } 1352 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1354 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1356 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1357 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1359 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1362 PetscInt inodecount,inodelimit,*inodes; 1363 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1364 if (inodes) { 1365 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1366 } else { 1367 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1368 } 1369 PetscFunctionReturn(0); 1370 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1371 PetscFunctionReturn(0); 1372 } 1373 } else if (isbinary) { 1374 if (size == 1) { 1375 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1376 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1377 } else { 1378 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } else if (isdraw) { 1382 PetscDraw draw; 1383 PetscBool isnull; 1384 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1385 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1386 } 1387 1388 { 1389 /* assemble the entire matrix onto first processor. */ 1390 Mat A; 1391 Mat_SeqAIJ *Aloc; 1392 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1393 MatScalar *a; 1394 1395 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1396 if (!rank) { 1397 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1398 } else { 1399 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1400 } 1401 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1402 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1403 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1404 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1405 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1406 1407 /* copy over the A part */ 1408 Aloc = (Mat_SeqAIJ*)aij->A->data; 1409 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1410 row = mat->rmap->rstart; 1411 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1412 for (i=0; i<m; i++) { 1413 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1414 row++; 1415 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1416 } 1417 aj = Aloc->j; 1418 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1419 1420 /* copy over the B part */ 1421 Aloc = (Mat_SeqAIJ*)aij->B->data; 1422 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1423 row = mat->rmap->rstart; 1424 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1425 ct = cols; 1426 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1427 for (i=0; i<m; i++) { 1428 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1429 row++; 1430 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1431 } 1432 ierr = PetscFree(ct);CHKERRQ(ierr); 1433 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1434 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 /* 1436 Everyone has to call to draw the matrix since the graphics waits are 1437 synchronized across all processors that share the PetscDraw object 1438 */ 1439 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1440 if (!rank) { 1441 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1442 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1443 } 1444 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1445 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1446 ierr = MatDestroy(&A);CHKERRQ(ierr); 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatView_MPIAIJ" 1453 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1454 { 1455 PetscErrorCode ierr; 1456 PetscBool iascii,isdraw,issocket,isbinary; 1457 1458 PetscFunctionBegin; 1459 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1463 if (iascii || isdraw || isbinary || issocket) { 1464 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatSOR_MPIAIJ" 1471 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1472 { 1473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1474 PetscErrorCode ierr; 1475 Vec bb1 = 0; 1476 PetscBool hasop; 1477 1478 PetscFunctionBegin; 1479 if (flag == SOR_APPLY_UPPER) { 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1485 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1486 } 1487 1488 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1489 if (flag & SOR_ZERO_INITIAL_GUESS) { 1490 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1491 its--; 1492 } 1493 1494 while (its--) { 1495 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 1498 /* update rhs: bb1 = bb - B*x */ 1499 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1500 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1501 1502 /* local sweep */ 1503 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1504 } 1505 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1506 if (flag & SOR_ZERO_INITIAL_GUESS) { 1507 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1508 its--; 1509 } 1510 while (its--) { 1511 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 1514 /* update rhs: bb1 = bb - B*x */ 1515 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1516 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1517 1518 /* local sweep */ 1519 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1520 } 1521 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1522 if (flag & SOR_ZERO_INITIAL_GUESS) { 1523 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1524 its--; 1525 } 1526 while (its--) { 1527 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1528 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 1530 /* update rhs: bb1 = bb - B*x */ 1531 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1532 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1533 1534 /* local sweep */ 1535 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1536 } 1537 } else if (flag & SOR_EISENSTAT) { 1538 Vec xx1; 1539 1540 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1542 1543 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1544 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 if (!mat->diag) { 1546 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1547 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1548 } 1549 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1550 if (hasop) { 1551 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1552 } else { 1553 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1554 } 1555 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1556 1557 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1558 1559 /* local sweep */ 1560 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1561 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1562 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1563 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1564 1565 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1566 1567 matin->errortype = mat->A->errortype; 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatPermute_MPIAIJ" 1573 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1574 { 1575 Mat aA,aB,Aperm; 1576 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1577 PetscScalar *aa,*ba; 1578 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1579 PetscSF rowsf,sf; 1580 IS parcolp = NULL; 1581 PetscBool done; 1582 PetscErrorCode ierr; 1583 1584 PetscFunctionBegin; 1585 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1586 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1587 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1588 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1589 1590 /* Invert row permutation to find out where my rows should go */ 1591 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1592 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1593 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1594 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1595 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1596 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1597 1598 /* Invert column permutation to find out where my columns should go */ 1599 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1600 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1601 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1602 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1603 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1604 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1605 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1606 1607 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1608 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1609 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1610 1611 /* Find out where my gcols should go */ 1612 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1613 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1614 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1615 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1616 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1617 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1618 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1619 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1620 1621 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1622 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1623 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1624 for (i=0; i<m; i++) { 1625 PetscInt row = rdest[i],rowner; 1626 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1627 for (j=ai[i]; j<ai[i+1]; j++) { 1628 PetscInt cowner,col = cdest[aj[j]]; 1629 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1630 if (rowner == cowner) dnnz[i]++; 1631 else onnz[i]++; 1632 } 1633 for (j=bi[i]; j<bi[i+1]; j++) { 1634 PetscInt cowner,col = gcdest[bj[j]]; 1635 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1636 if (rowner == cowner) dnnz[i]++; 1637 else onnz[i]++; 1638 } 1639 } 1640 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1641 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1642 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1643 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1644 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1645 1646 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1647 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1648 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1649 for (i=0; i<m; i++) { 1650 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1651 PetscInt j0,rowlen; 1652 rowlen = ai[i+1] - ai[i]; 1653 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1654 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1655 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1656 } 1657 rowlen = bi[i+1] - bi[i]; 1658 for (j0=j=0; j<rowlen; j0=j) { 1659 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1660 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1661 } 1662 } 1663 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1664 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1665 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1666 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1667 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1668 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1669 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1670 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1671 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1672 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1673 *B = Aperm; 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1679 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1680 { 1681 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1682 PetscErrorCode ierr; 1683 1684 PetscFunctionBegin; 1685 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1686 if (ghosts) *ghosts = aij->garray; 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1692 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1693 { 1694 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1695 Mat A = mat->A,B = mat->B; 1696 PetscErrorCode ierr; 1697 PetscReal isend[5],irecv[5]; 1698 1699 PetscFunctionBegin; 1700 info->block_size = 1.0; 1701 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1702 1703 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1704 isend[3] = info->memory; isend[4] = info->mallocs; 1705 1706 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1707 1708 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1709 isend[3] += info->memory; isend[4] += info->mallocs; 1710 if (flag == MAT_LOCAL) { 1711 info->nz_used = isend[0]; 1712 info->nz_allocated = isend[1]; 1713 info->nz_unneeded = isend[2]; 1714 info->memory = isend[3]; 1715 info->mallocs = isend[4]; 1716 } else if (flag == MAT_GLOBAL_MAX) { 1717 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1718 1719 info->nz_used = irecv[0]; 1720 info->nz_allocated = irecv[1]; 1721 info->nz_unneeded = irecv[2]; 1722 info->memory = irecv[3]; 1723 info->mallocs = irecv[4]; 1724 } else if (flag == MAT_GLOBAL_SUM) { 1725 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1726 1727 info->nz_used = irecv[0]; 1728 info->nz_allocated = irecv[1]; 1729 info->nz_unneeded = irecv[2]; 1730 info->memory = irecv[3]; 1731 info->mallocs = irecv[4]; 1732 } 1733 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1734 info->fill_ratio_needed = 0; 1735 info->factor_mallocs = 0; 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatSetOption_MPIAIJ" 1741 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1742 { 1743 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1744 PetscErrorCode ierr; 1745 1746 PetscFunctionBegin; 1747 switch (op) { 1748 case MAT_NEW_NONZERO_LOCATIONS: 1749 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1750 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1751 case MAT_KEEP_NONZERO_PATTERN: 1752 case MAT_NEW_NONZERO_LOCATION_ERR: 1753 case MAT_USE_INODES: 1754 case MAT_IGNORE_ZERO_ENTRIES: 1755 MatCheckPreallocated(A,1); 1756 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1757 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1758 break; 1759 case MAT_ROW_ORIENTED: 1760 a->roworiented = flg; 1761 1762 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1763 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1764 break; 1765 case MAT_NEW_DIAGONALS: 1766 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1767 break; 1768 case MAT_IGNORE_OFF_PROC_ENTRIES: 1769 a->donotstash = flg; 1770 break; 1771 case MAT_SPD: 1772 A->spd_set = PETSC_TRUE; 1773 A->spd = flg; 1774 if (flg) { 1775 A->symmetric = PETSC_TRUE; 1776 A->structurally_symmetric = PETSC_TRUE; 1777 A->symmetric_set = PETSC_TRUE; 1778 A->structurally_symmetric_set = PETSC_TRUE; 1779 } 1780 break; 1781 case MAT_SYMMETRIC: 1782 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1783 break; 1784 case MAT_STRUCTURALLY_SYMMETRIC: 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 break; 1787 case MAT_HERMITIAN: 1788 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1789 break; 1790 case MAT_SYMMETRY_ETERNAL: 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 break; 1793 default: 1794 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1795 } 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatGetRow_MPIAIJ" 1801 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1802 { 1803 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1804 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1805 PetscErrorCode ierr; 1806 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1807 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1808 PetscInt *cmap,*idx_p; 1809 1810 PetscFunctionBegin; 1811 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1812 mat->getrowactive = PETSC_TRUE; 1813 1814 if (!mat->rowvalues && (idx || v)) { 1815 /* 1816 allocate enough space to hold information from the longest row. 1817 */ 1818 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1819 PetscInt max = 1,tmp; 1820 for (i=0; i<matin->rmap->n; i++) { 1821 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1822 if (max < tmp) max = tmp; 1823 } 1824 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1825 } 1826 1827 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1828 lrow = row - rstart; 1829 1830 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1831 if (!v) {pvA = 0; pvB = 0;} 1832 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1833 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1834 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1835 nztot = nzA + nzB; 1836 1837 cmap = mat->garray; 1838 if (v || idx) { 1839 if (nztot) { 1840 /* Sort by increasing column numbers, assuming A and B already sorted */ 1841 PetscInt imark = -1; 1842 if (v) { 1843 *v = v_p = mat->rowvalues; 1844 for (i=0; i<nzB; i++) { 1845 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1846 else break; 1847 } 1848 imark = i; 1849 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1850 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1851 } 1852 if (idx) { 1853 *idx = idx_p = mat->rowindices; 1854 if (imark > -1) { 1855 for (i=0; i<imark; i++) { 1856 idx_p[i] = cmap[cworkB[i]]; 1857 } 1858 } else { 1859 for (i=0; i<nzB; i++) { 1860 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1861 else break; 1862 } 1863 imark = i; 1864 } 1865 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1866 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1867 } 1868 } else { 1869 if (idx) *idx = 0; 1870 if (v) *v = 0; 1871 } 1872 } 1873 *nz = nztot; 1874 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1875 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1881 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1882 { 1883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1884 1885 PetscFunctionBegin; 1886 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1887 aij->getrowactive = PETSC_FALSE; 1888 PetscFunctionReturn(0); 1889 } 1890 1891 #undef __FUNCT__ 1892 #define __FUNCT__ "MatNorm_MPIAIJ" 1893 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1894 { 1895 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1896 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1897 PetscErrorCode ierr; 1898 PetscInt i,j,cstart = mat->cmap->rstart; 1899 PetscReal sum = 0.0; 1900 MatScalar *v; 1901 1902 PetscFunctionBegin; 1903 if (aij->size == 1) { 1904 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1905 } else { 1906 if (type == NORM_FROBENIUS) { 1907 v = amat->a; 1908 for (i=0; i<amat->nz; i++) { 1909 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1910 } 1911 v = bmat->a; 1912 for (i=0; i<bmat->nz; i++) { 1913 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1914 } 1915 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1916 *norm = PetscSqrtReal(*norm); 1917 } else if (type == NORM_1) { /* max column norm */ 1918 PetscReal *tmp,*tmp2; 1919 PetscInt *jj,*garray = aij->garray; 1920 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1921 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1922 *norm = 0.0; 1923 v = amat->a; jj = amat->j; 1924 for (j=0; j<amat->nz; j++) { 1925 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1926 } 1927 v = bmat->a; jj = bmat->j; 1928 for (j=0; j<bmat->nz; j++) { 1929 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1930 } 1931 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1932 for (j=0; j<mat->cmap->N; j++) { 1933 if (tmp2[j] > *norm) *norm = tmp2[j]; 1934 } 1935 ierr = PetscFree(tmp);CHKERRQ(ierr); 1936 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1937 } else if (type == NORM_INFINITY) { /* max row norm */ 1938 PetscReal ntemp = 0.0; 1939 for (j=0; j<aij->A->rmap->n; j++) { 1940 v = amat->a + amat->i[j]; 1941 sum = 0.0; 1942 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1943 sum += PetscAbsScalar(*v); v++; 1944 } 1945 v = bmat->a + bmat->i[j]; 1946 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1947 sum += PetscAbsScalar(*v); v++; 1948 } 1949 if (sum > ntemp) ntemp = sum; 1950 } 1951 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1952 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1953 } 1954 PetscFunctionReturn(0); 1955 } 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatTranspose_MPIAIJ" 1959 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1960 { 1961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1962 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1963 PetscErrorCode ierr; 1964 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1965 PetscInt cstart = A->cmap->rstart,ncol; 1966 Mat B; 1967 MatScalar *array; 1968 1969 PetscFunctionBegin; 1970 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1971 1972 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1973 ai = Aloc->i; aj = Aloc->j; 1974 bi = Bloc->i; bj = Bloc->j; 1975 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1976 PetscInt *d_nnz,*g_nnz,*o_nnz; 1977 PetscSFNode *oloc; 1978 PETSC_UNUSED PetscSF sf; 1979 1980 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1981 /* compute d_nnz for preallocation */ 1982 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1983 for (i=0; i<ai[ma]; i++) { 1984 d_nnz[aj[i]]++; 1985 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1986 } 1987 /* compute local off-diagonal contributions */ 1988 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1989 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1990 /* map those to global */ 1991 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1992 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1993 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1994 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1995 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1996 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1997 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1998 1999 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2000 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2001 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2002 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2003 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2004 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2005 } else { 2006 B = *matout; 2007 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2008 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2009 } 2010 2011 /* copy over the A part */ 2012 array = Aloc->a; 2013 row = A->rmap->rstart; 2014 for (i=0; i<ma; i++) { 2015 ncol = ai[i+1]-ai[i]; 2016 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2017 row++; 2018 array += ncol; aj += ncol; 2019 } 2020 aj = Aloc->j; 2021 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2022 2023 /* copy over the B part */ 2024 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2025 array = Bloc->a; 2026 row = A->rmap->rstart; 2027 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2028 cols_tmp = cols; 2029 for (i=0; i<mb; i++) { 2030 ncol = bi[i+1]-bi[i]; 2031 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2032 row++; 2033 array += ncol; cols_tmp += ncol; 2034 } 2035 ierr = PetscFree(cols);CHKERRQ(ierr); 2036 2037 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2038 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2039 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2040 *matout = B; 2041 } else { 2042 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2043 } 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2049 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2050 { 2051 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2052 Mat a = aij->A,b = aij->B; 2053 PetscErrorCode ierr; 2054 PetscInt s1,s2,s3; 2055 2056 PetscFunctionBegin; 2057 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2058 if (rr) { 2059 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2060 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2061 /* Overlap communication with computation. */ 2062 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2063 } 2064 if (ll) { 2065 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2066 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2067 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2068 } 2069 /* scale the diagonal block */ 2070 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2071 2072 if (rr) { 2073 /* Do a scatter end and then right scale the off-diagonal block */ 2074 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2075 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2076 } 2077 PetscFunctionReturn(0); 2078 } 2079 2080 #undef __FUNCT__ 2081 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2082 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2083 { 2084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2085 PetscErrorCode ierr; 2086 2087 PetscFunctionBegin; 2088 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "MatEqual_MPIAIJ" 2094 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2095 { 2096 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2097 Mat a,b,c,d; 2098 PetscBool flg; 2099 PetscErrorCode ierr; 2100 2101 PetscFunctionBegin; 2102 a = matA->A; b = matA->B; 2103 c = matB->A; d = matB->B; 2104 2105 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2106 if (flg) { 2107 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2108 } 2109 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2110 PetscFunctionReturn(0); 2111 } 2112 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "MatCopy_MPIAIJ" 2115 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2116 { 2117 PetscErrorCode ierr; 2118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2119 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2120 2121 PetscFunctionBegin; 2122 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2123 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2124 /* because of the column compression in the off-processor part of the matrix a->B, 2125 the number of columns in a->B and b->B may be different, hence we cannot call 2126 the MatCopy() directly on the two parts. If need be, we can provide a more 2127 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2128 then copying the submatrices */ 2129 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2130 } else { 2131 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2132 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2133 } 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatSetUp_MPIAIJ" 2139 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2140 { 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 /* 2149 Computes the number of nonzeros per row needed for preallocation when X and Y 2150 have different nonzero structure. 2151 */ 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2154 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2155 { 2156 PetscInt i,j,k,nzx,nzy; 2157 2158 PetscFunctionBegin; 2159 /* Set the number of nonzeros in the new matrix */ 2160 for (i=0; i<m; i++) { 2161 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2162 nzx = xi[i+1] - xi[i]; 2163 nzy = yi[i+1] - yi[i]; 2164 nnz[i] = 0; 2165 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2166 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2167 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2168 nnz[i]++; 2169 } 2170 for (; k<nzy; k++) nnz[i]++; 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2178 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2179 { 2180 PetscErrorCode ierr; 2181 PetscInt m = Y->rmap->N; 2182 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2183 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2184 2185 PetscFunctionBegin; 2186 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 #undef __FUNCT__ 2191 #define __FUNCT__ "MatAXPY_MPIAIJ" 2192 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2193 { 2194 PetscErrorCode ierr; 2195 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2196 PetscBLASInt bnz,one=1; 2197 Mat_SeqAIJ *x,*y; 2198 2199 PetscFunctionBegin; 2200 if (str == SAME_NONZERO_PATTERN) { 2201 PetscScalar alpha = a; 2202 x = (Mat_SeqAIJ*)xx->A->data; 2203 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2204 y = (Mat_SeqAIJ*)yy->A->data; 2205 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2206 x = (Mat_SeqAIJ*)xx->B->data; 2207 y = (Mat_SeqAIJ*)yy->B->data; 2208 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2209 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2210 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2211 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2212 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2213 } else { 2214 Mat B; 2215 PetscInt *nnz_d,*nnz_o; 2216 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2217 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2218 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2219 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2220 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2221 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2222 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2223 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2224 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2225 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2226 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2227 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2228 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2229 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2230 } 2231 PetscFunctionReturn(0); 2232 } 2233 2234 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatConjugate_MPIAIJ" 2238 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2239 { 2240 #if defined(PETSC_USE_COMPLEX) 2241 PetscErrorCode ierr; 2242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2243 2244 PetscFunctionBegin; 2245 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2246 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2247 #else 2248 PetscFunctionBegin; 2249 #endif 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatRealPart_MPIAIJ" 2255 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2256 { 2257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2258 PetscErrorCode ierr; 2259 2260 PetscFunctionBegin; 2261 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2262 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2268 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2269 { 2270 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2271 PetscErrorCode ierr; 2272 2273 PetscFunctionBegin; 2274 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2275 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2281 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2282 { 2283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2284 PetscErrorCode ierr; 2285 PetscInt i,*idxb = 0; 2286 PetscScalar *va,*vb; 2287 Vec vtmp; 2288 2289 PetscFunctionBegin; 2290 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2291 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2292 if (idx) { 2293 for (i=0; i<A->rmap->n; i++) { 2294 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2295 } 2296 } 2297 2298 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2299 if (idx) { 2300 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2301 } 2302 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2303 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2304 2305 for (i=0; i<A->rmap->n; i++) { 2306 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2307 va[i] = vb[i]; 2308 if (idx) idx[i] = a->garray[idxb[i]]; 2309 } 2310 } 2311 2312 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2313 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2314 ierr = PetscFree(idxb);CHKERRQ(ierr); 2315 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 #undef __FUNCT__ 2320 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2321 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2322 { 2323 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2324 PetscErrorCode ierr; 2325 PetscInt i,*idxb = 0; 2326 PetscScalar *va,*vb; 2327 Vec vtmp; 2328 2329 PetscFunctionBegin; 2330 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2331 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2332 if (idx) { 2333 for (i=0; i<A->cmap->n; i++) { 2334 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2335 } 2336 } 2337 2338 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2339 if (idx) { 2340 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2341 } 2342 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2343 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2344 2345 for (i=0; i<A->rmap->n; i++) { 2346 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2347 va[i] = vb[i]; 2348 if (idx) idx[i] = a->garray[idxb[i]]; 2349 } 2350 } 2351 2352 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2353 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2354 ierr = PetscFree(idxb);CHKERRQ(ierr); 2355 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2356 PetscFunctionReturn(0); 2357 } 2358 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2361 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2362 { 2363 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2364 PetscInt n = A->rmap->n; 2365 PetscInt cstart = A->cmap->rstart; 2366 PetscInt *cmap = mat->garray; 2367 PetscInt *diagIdx, *offdiagIdx; 2368 Vec diagV, offdiagV; 2369 PetscScalar *a, *diagA, *offdiagA; 2370 PetscInt r; 2371 PetscErrorCode ierr; 2372 2373 PetscFunctionBegin; 2374 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2375 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2376 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2377 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2378 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2379 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2380 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2381 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2382 for (r = 0; r < n; ++r) { 2383 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2384 a[r] = diagA[r]; 2385 idx[r] = cstart + diagIdx[r]; 2386 } else { 2387 a[r] = offdiagA[r]; 2388 idx[r] = cmap[offdiagIdx[r]]; 2389 } 2390 } 2391 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2392 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2393 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2394 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2395 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2396 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2402 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2403 { 2404 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2405 PetscInt n = A->rmap->n; 2406 PetscInt cstart = A->cmap->rstart; 2407 PetscInt *cmap = mat->garray; 2408 PetscInt *diagIdx, *offdiagIdx; 2409 Vec diagV, offdiagV; 2410 PetscScalar *a, *diagA, *offdiagA; 2411 PetscInt r; 2412 PetscErrorCode ierr; 2413 2414 PetscFunctionBegin; 2415 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2416 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2417 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2418 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2419 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2420 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2421 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2422 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2423 for (r = 0; r < n; ++r) { 2424 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2425 a[r] = diagA[r]; 2426 idx[r] = cstart + diagIdx[r]; 2427 } else { 2428 a[r] = offdiagA[r]; 2429 idx[r] = cmap[offdiagIdx[r]]; 2430 } 2431 } 2432 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2433 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2434 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2435 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2436 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2437 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 #undef __FUNCT__ 2442 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2443 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2444 { 2445 PetscErrorCode ierr; 2446 Mat *dummy; 2447 2448 PetscFunctionBegin; 2449 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2450 *newmat = *dummy; 2451 ierr = PetscFree(dummy);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2457 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2458 { 2459 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2460 PetscErrorCode ierr; 2461 2462 PetscFunctionBegin; 2463 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2464 A->errortype = a->A->errortype; 2465 PetscFunctionReturn(0); 2466 } 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2470 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2471 { 2472 PetscErrorCode ierr; 2473 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2474 2475 PetscFunctionBegin; 2476 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2477 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2478 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2479 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2485 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2486 { 2487 PetscFunctionBegin; 2488 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2489 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2490 PetscFunctionReturn(0); 2491 } 2492 2493 #undef __FUNCT__ 2494 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2495 /*@ 2496 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2497 2498 Collective on Mat 2499 2500 Input Parameters: 2501 + A - the matrix 2502 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2503 2504 Level: advanced 2505 2506 @*/ 2507 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2508 { 2509 PetscErrorCode ierr; 2510 2511 PetscFunctionBegin; 2512 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2518 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2519 { 2520 PetscErrorCode ierr; 2521 PetscBool sc = PETSC_FALSE,flg; 2522 2523 PetscFunctionBegin; 2524 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2525 ierr = PetscObjectOptionsBegin((PetscObject)A); 2526 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2527 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2528 if (flg) { 2529 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2530 } 2531 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #undef __FUNCT__ 2536 #define __FUNCT__ "MatShift_MPIAIJ" 2537 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2538 { 2539 PetscErrorCode ierr; 2540 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2541 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2542 2543 PetscFunctionBegin; 2544 if (!Y->preallocated) { 2545 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2546 } else if (!aij->nz) { 2547 PetscInt nonew = aij->nonew; 2548 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2549 aij->nonew = nonew; 2550 } 2551 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2557 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2558 { 2559 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2560 PetscErrorCode ierr; 2561 2562 PetscFunctionBegin; 2563 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2564 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2565 if (d) { 2566 PetscInt rstart; 2567 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2568 *d += rstart; 2569 2570 } 2571 PetscFunctionReturn(0); 2572 } 2573 2574 2575 /* -------------------------------------------------------------------*/ 2576 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2577 MatGetRow_MPIAIJ, 2578 MatRestoreRow_MPIAIJ, 2579 MatMult_MPIAIJ, 2580 /* 4*/ MatMultAdd_MPIAIJ, 2581 MatMultTranspose_MPIAIJ, 2582 MatMultTransposeAdd_MPIAIJ, 2583 0, 2584 0, 2585 0, 2586 /*10*/ 0, 2587 0, 2588 0, 2589 MatSOR_MPIAIJ, 2590 MatTranspose_MPIAIJ, 2591 /*15*/ MatGetInfo_MPIAIJ, 2592 MatEqual_MPIAIJ, 2593 MatGetDiagonal_MPIAIJ, 2594 MatDiagonalScale_MPIAIJ, 2595 MatNorm_MPIAIJ, 2596 /*20*/ MatAssemblyBegin_MPIAIJ, 2597 MatAssemblyEnd_MPIAIJ, 2598 MatSetOption_MPIAIJ, 2599 MatZeroEntries_MPIAIJ, 2600 /*24*/ MatZeroRows_MPIAIJ, 2601 0, 2602 0, 2603 0, 2604 0, 2605 /*29*/ MatSetUp_MPIAIJ, 2606 0, 2607 0, 2608 0, 2609 0, 2610 /*34*/ MatDuplicate_MPIAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*39*/ MatAXPY_MPIAIJ, 2616 MatGetSubMatrices_MPIAIJ, 2617 MatIncreaseOverlap_MPIAIJ, 2618 MatGetValues_MPIAIJ, 2619 MatCopy_MPIAIJ, 2620 /*44*/ MatGetRowMax_MPIAIJ, 2621 MatScale_MPIAIJ, 2622 MatShift_MPIAIJ, 2623 MatDiagonalSet_MPIAIJ, 2624 MatZeroRowsColumns_MPIAIJ, 2625 /*49*/ MatSetRandom_MPIAIJ, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*54*/ MatFDColoringCreate_MPIXAIJ, 2631 0, 2632 MatSetUnfactored_MPIAIJ, 2633 MatPermute_MPIAIJ, 2634 0, 2635 /*59*/ MatGetSubMatrix_MPIAIJ, 2636 MatDestroy_MPIAIJ, 2637 MatView_MPIAIJ, 2638 0, 2639 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2640 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2641 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2642 0, 2643 0, 2644 0, 2645 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2646 MatGetRowMinAbs_MPIAIJ, 2647 0, 2648 MatSetColoring_MPIAIJ, 2649 0, 2650 MatSetValuesAdifor_MPIAIJ, 2651 /*75*/ MatFDColoringApply_AIJ, 2652 MatSetFromOptions_MPIAIJ, 2653 0, 2654 0, 2655 MatFindZeroDiagonals_MPIAIJ, 2656 /*80*/ 0, 2657 0, 2658 0, 2659 /*83*/ MatLoad_MPIAIJ, 2660 0, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2666 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2667 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2668 MatPtAP_MPIAIJ_MPIAIJ, 2669 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2670 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*99*/ 0, 2676 0, 2677 0, 2678 MatConjugate_MPIAIJ, 2679 0, 2680 /*104*/MatSetValuesRow_MPIAIJ, 2681 MatRealPart_MPIAIJ, 2682 MatImaginaryPart_MPIAIJ, 2683 0, 2684 0, 2685 /*109*/0, 2686 0, 2687 MatGetRowMin_MPIAIJ, 2688 0, 2689 MatMissingDiagonal_MPIAIJ, 2690 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2691 0, 2692 MatGetGhosts_MPIAIJ, 2693 0, 2694 0, 2695 /*119*/0, 2696 0, 2697 0, 2698 0, 2699 MatGetMultiProcBlock_MPIAIJ, 2700 /*124*/MatFindNonzeroRows_MPIAIJ, 2701 MatGetColumnNorms_MPIAIJ, 2702 MatInvertBlockDiagonal_MPIAIJ, 2703 0, 2704 MatGetSubMatricesMPI_MPIAIJ, 2705 /*129*/0, 2706 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2707 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2708 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2709 0, 2710 /*134*/0, 2711 0, 2712 0, 2713 0, 2714 0, 2715 /*139*/0, 2716 0, 2717 0, 2718 MatFDColoringSetUp_MPIXAIJ, 2719 MatFindOffBlockDiagonalEntries_MPIAIJ, 2720 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2721 }; 2722 2723 /* ----------------------------------------------------------------------------------------*/ 2724 2725 #undef __FUNCT__ 2726 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2727 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2728 { 2729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2730 PetscErrorCode ierr; 2731 2732 PetscFunctionBegin; 2733 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2734 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2735 PetscFunctionReturn(0); 2736 } 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2740 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2741 { 2742 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2743 PetscErrorCode ierr; 2744 2745 PetscFunctionBegin; 2746 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2747 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2753 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2754 { 2755 Mat_MPIAIJ *b; 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2760 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2761 b = (Mat_MPIAIJ*)B->data; 2762 2763 if (!B->preallocated) { 2764 /* Explicitly create 2 MATSEQAIJ matrices. */ 2765 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2766 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2767 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2768 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2769 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2770 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2771 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2772 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2773 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2774 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2775 } 2776 2777 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2778 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2779 B->preallocated = PETSC_TRUE; 2780 PetscFunctionReturn(0); 2781 } 2782 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2785 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2786 { 2787 Mat mat; 2788 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 *newmat = 0; 2793 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2794 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2795 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2796 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2797 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2798 a = (Mat_MPIAIJ*)mat->data; 2799 2800 mat->factortype = matin->factortype; 2801 mat->assembled = PETSC_TRUE; 2802 mat->insertmode = NOT_SET_VALUES; 2803 mat->preallocated = PETSC_TRUE; 2804 2805 a->size = oldmat->size; 2806 a->rank = oldmat->rank; 2807 a->donotstash = oldmat->donotstash; 2808 a->roworiented = oldmat->roworiented; 2809 a->rowindices = 0; 2810 a->rowvalues = 0; 2811 a->getrowactive = PETSC_FALSE; 2812 2813 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2814 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2815 2816 if (oldmat->colmap) { 2817 #if defined(PETSC_USE_CTABLE) 2818 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2819 #else 2820 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2821 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2822 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2823 #endif 2824 } else a->colmap = 0; 2825 if (oldmat->garray) { 2826 PetscInt len; 2827 len = oldmat->B->cmap->n; 2828 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2829 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2830 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2831 } else a->garray = 0; 2832 2833 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2834 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2835 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2836 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2837 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2838 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2839 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2840 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2841 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2842 *newmat = mat; 2843 PetscFunctionReturn(0); 2844 } 2845 2846 2847 2848 #undef __FUNCT__ 2849 #define __FUNCT__ "MatLoad_MPIAIJ" 2850 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2851 { 2852 PetscScalar *vals,*svals; 2853 MPI_Comm comm; 2854 PetscErrorCode ierr; 2855 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2856 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2857 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2858 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2859 PetscInt cend,cstart,n,*rowners; 2860 int fd; 2861 PetscInt bs = newMat->rmap->bs; 2862 2863 PetscFunctionBegin; 2864 /* force binary viewer to load .info file if it has not yet done so */ 2865 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2866 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2867 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2868 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2869 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2870 if (!rank) { 2871 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2872 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2873 } 2874 2875 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2876 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2877 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2878 if (bs < 0) bs = 1; 2879 2880 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2881 M = header[1]; N = header[2]; 2882 2883 /* If global sizes are set, check if they are consistent with that given in the file */ 2884 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2885 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2886 2887 /* determine ownership of all (block) rows */ 2888 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2889 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2890 else m = newMat->rmap->n; /* Set by user */ 2891 2892 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2893 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2894 2895 /* First process needs enough room for process with most rows */ 2896 if (!rank) { 2897 mmax = rowners[1]; 2898 for (i=2; i<=size; i++) { 2899 mmax = PetscMax(mmax, rowners[i]); 2900 } 2901 } else mmax = -1; /* unused, but compilers complain */ 2902 2903 rowners[0] = 0; 2904 for (i=2; i<=size; i++) { 2905 rowners[i] += rowners[i-1]; 2906 } 2907 rstart = rowners[rank]; 2908 rend = rowners[rank+1]; 2909 2910 /* distribute row lengths to all processors */ 2911 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2912 if (!rank) { 2913 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2914 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2915 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2916 for (j=0; j<m; j++) { 2917 procsnz[0] += ourlens[j]; 2918 } 2919 for (i=1; i<size; i++) { 2920 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2921 /* calculate the number of nonzeros on each processor */ 2922 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2923 procsnz[i] += rowlengths[j]; 2924 } 2925 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2926 } 2927 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2928 } else { 2929 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2930 } 2931 2932 if (!rank) { 2933 /* determine max buffer needed and allocate it */ 2934 maxnz = 0; 2935 for (i=0; i<size; i++) { 2936 maxnz = PetscMax(maxnz,procsnz[i]); 2937 } 2938 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2939 2940 /* read in my part of the matrix column indices */ 2941 nz = procsnz[0]; 2942 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2943 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2944 2945 /* read in every one elses and ship off */ 2946 for (i=1; i<size; i++) { 2947 nz = procsnz[i]; 2948 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2949 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2950 } 2951 ierr = PetscFree(cols);CHKERRQ(ierr); 2952 } else { 2953 /* determine buffer space needed for message */ 2954 nz = 0; 2955 for (i=0; i<m; i++) { 2956 nz += ourlens[i]; 2957 } 2958 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2959 2960 /* receive message of column indices*/ 2961 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2962 } 2963 2964 /* determine column ownership if matrix is not square */ 2965 if (N != M) { 2966 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2967 else n = newMat->cmap->n; 2968 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2969 cstart = cend - n; 2970 } else { 2971 cstart = rstart; 2972 cend = rend; 2973 n = cend - cstart; 2974 } 2975 2976 /* loop over local rows, determining number of off diagonal entries */ 2977 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2978 jj = 0; 2979 for (i=0; i<m; i++) { 2980 for (j=0; j<ourlens[i]; j++) { 2981 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2982 jj++; 2983 } 2984 } 2985 2986 for (i=0; i<m; i++) { 2987 ourlens[i] -= offlens[i]; 2988 } 2989 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2990 2991 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2992 2993 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2994 2995 for (i=0; i<m; i++) { 2996 ourlens[i] += offlens[i]; 2997 } 2998 2999 if (!rank) { 3000 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3001 3002 /* read in my part of the matrix numerical values */ 3003 nz = procsnz[0]; 3004 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3005 3006 /* insert into matrix */ 3007 jj = rstart; 3008 smycols = mycols; 3009 svals = vals; 3010 for (i=0; i<m; i++) { 3011 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3012 smycols += ourlens[i]; 3013 svals += ourlens[i]; 3014 jj++; 3015 } 3016 3017 /* read in other processors and ship out */ 3018 for (i=1; i<size; i++) { 3019 nz = procsnz[i]; 3020 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3021 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3022 } 3023 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3024 } else { 3025 /* receive numeric values */ 3026 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3027 3028 /* receive message of values*/ 3029 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3030 3031 /* insert into matrix */ 3032 jj = rstart; 3033 smycols = mycols; 3034 svals = vals; 3035 for (i=0; i<m; i++) { 3036 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3037 smycols += ourlens[i]; 3038 svals += ourlens[i]; 3039 jj++; 3040 } 3041 } 3042 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3043 ierr = PetscFree(vals);CHKERRQ(ierr); 3044 ierr = PetscFree(mycols);CHKERRQ(ierr); 3045 ierr = PetscFree(rowners);CHKERRQ(ierr); 3046 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3047 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3048 PetscFunctionReturn(0); 3049 } 3050 3051 #undef __FUNCT__ 3052 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3053 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3054 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3055 { 3056 PetscErrorCode ierr; 3057 IS iscol_local; 3058 PetscInt csize; 3059 3060 PetscFunctionBegin; 3061 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3062 if (call == MAT_REUSE_MATRIX) { 3063 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3064 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3065 } else { 3066 /* check if we are grabbing all columns*/ 3067 PetscBool isstride; 3068 PetscMPIInt lisstride = 0,gisstride; 3069 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3070 if (isstride) { 3071 PetscInt start,len,mstart,mlen; 3072 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3073 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3074 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3075 if (mstart == start && mlen-mstart == len) lisstride = 1; 3076 } 3077 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3078 if (gisstride) { 3079 PetscInt N; 3080 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3081 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3082 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3083 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3084 } else { 3085 PetscInt cbs; 3086 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3087 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3088 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3089 } 3090 } 3091 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3092 if (call == MAT_INITIAL_MATRIX) { 3093 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3094 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3095 } 3096 PetscFunctionReturn(0); 3097 } 3098 3099 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3100 #undef __FUNCT__ 3101 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3102 /* 3103 Not great since it makes two copies of the submatrix, first an SeqAIJ 3104 in local and then by concatenating the local matrices the end result. 3105 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3106 3107 Note: This requires a sequential iscol with all indices. 3108 */ 3109 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3110 { 3111 PetscErrorCode ierr; 3112 PetscMPIInt rank,size; 3113 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3114 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3115 PetscBool allcolumns, colflag; 3116 Mat M,Mreuse; 3117 MatScalar *vwork,*aa; 3118 MPI_Comm comm; 3119 Mat_SeqAIJ *aij; 3120 3121 PetscFunctionBegin; 3122 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3123 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3124 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3125 3126 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3127 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3128 if (colflag && ncol == mat->cmap->N) { 3129 allcolumns = PETSC_TRUE; 3130 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3131 } else { 3132 allcolumns = PETSC_FALSE; 3133 } 3134 if (call == MAT_REUSE_MATRIX) { 3135 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3136 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3137 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3138 } else { 3139 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3140 } 3141 3142 /* 3143 m - number of local rows 3144 n - number of columns (same on all processors) 3145 rstart - first row in new global matrix generated 3146 */ 3147 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3148 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3149 if (call == MAT_INITIAL_MATRIX) { 3150 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3151 ii = aij->i; 3152 jj = aij->j; 3153 3154 /* 3155 Determine the number of non-zeros in the diagonal and off-diagonal 3156 portions of the matrix in order to do correct preallocation 3157 */ 3158 3159 /* first get start and end of "diagonal" columns */ 3160 if (csize == PETSC_DECIDE) { 3161 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3162 if (mglobal == n) { /* square matrix */ 3163 nlocal = m; 3164 } else { 3165 nlocal = n/size + ((n % size) > rank); 3166 } 3167 } else { 3168 nlocal = csize; 3169 } 3170 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3171 rstart = rend - nlocal; 3172 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3173 3174 /* next, compute all the lengths */ 3175 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3176 olens = dlens + m; 3177 for (i=0; i<m; i++) { 3178 jend = ii[i+1] - ii[i]; 3179 olen = 0; 3180 dlen = 0; 3181 for (j=0; j<jend; j++) { 3182 if (*jj < rstart || *jj >= rend) olen++; 3183 else dlen++; 3184 jj++; 3185 } 3186 olens[i] = olen; 3187 dlens[i] = dlen; 3188 } 3189 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3190 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3191 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3192 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3193 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3194 ierr = PetscFree(dlens);CHKERRQ(ierr); 3195 } else { 3196 PetscInt ml,nl; 3197 3198 M = *newmat; 3199 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3200 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3201 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3202 /* 3203 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3204 rather than the slower MatSetValues(). 3205 */ 3206 M->was_assembled = PETSC_TRUE; 3207 M->assembled = PETSC_FALSE; 3208 } 3209 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3210 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3211 ii = aij->i; 3212 jj = aij->j; 3213 aa = aij->a; 3214 for (i=0; i<m; i++) { 3215 row = rstart + i; 3216 nz = ii[i+1] - ii[i]; 3217 cwork = jj; jj += nz; 3218 vwork = aa; aa += nz; 3219 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3220 } 3221 3222 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3223 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3224 *newmat = M; 3225 3226 /* save submatrix used in processor for next request */ 3227 if (call == MAT_INITIAL_MATRIX) { 3228 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3229 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3230 } 3231 PetscFunctionReturn(0); 3232 } 3233 3234 #undef __FUNCT__ 3235 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3236 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3237 { 3238 PetscInt m,cstart, cend,j,nnz,i,d; 3239 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3240 const PetscInt *JJ; 3241 PetscScalar *values; 3242 PetscErrorCode ierr; 3243 3244 PetscFunctionBegin; 3245 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3246 3247 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3248 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3249 m = B->rmap->n; 3250 cstart = B->cmap->rstart; 3251 cend = B->cmap->rend; 3252 rstart = B->rmap->rstart; 3253 3254 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3255 3256 #if defined(PETSC_USE_DEBUGGING) 3257 for (i=0; i<m; i++) { 3258 nnz = Ii[i+1]- Ii[i]; 3259 JJ = J + Ii[i]; 3260 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3261 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3262 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3263 } 3264 #endif 3265 3266 for (i=0; i<m; i++) { 3267 nnz = Ii[i+1]- Ii[i]; 3268 JJ = J + Ii[i]; 3269 nnz_max = PetscMax(nnz_max,nnz); 3270 d = 0; 3271 for (j=0; j<nnz; j++) { 3272 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3273 } 3274 d_nnz[i] = d; 3275 o_nnz[i] = nnz - d; 3276 } 3277 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3278 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3279 3280 if (v) values = (PetscScalar*)v; 3281 else { 3282 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3283 } 3284 3285 for (i=0; i<m; i++) { 3286 ii = i + rstart; 3287 nnz = Ii[i+1]- Ii[i]; 3288 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3289 } 3290 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3291 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3292 3293 if (!v) { 3294 ierr = PetscFree(values);CHKERRQ(ierr); 3295 } 3296 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } 3299 3300 #undef __FUNCT__ 3301 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3302 /*@ 3303 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3304 (the default parallel PETSc format). 3305 3306 Collective on MPI_Comm 3307 3308 Input Parameters: 3309 + B - the matrix 3310 . i - the indices into j for the start of each local row (starts with zero) 3311 . j - the column indices for each local row (starts with zero) 3312 - v - optional values in the matrix 3313 3314 Level: developer 3315 3316 Notes: 3317 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3318 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3319 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3320 3321 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3322 3323 The format which is used for the sparse matrix input, is equivalent to a 3324 row-major ordering.. i.e for the following matrix, the input data expected is 3325 as shown 3326 3327 $ 1 0 0 3328 $ 2 0 3 P0 3329 $ ------- 3330 $ 4 5 6 P1 3331 $ 3332 $ Process0 [P0]: rows_owned=[0,1] 3333 $ i = {0,1,3} [size = nrow+1 = 2+1] 3334 $ j = {0,0,2} [size = 3] 3335 $ v = {1,2,3} [size = 3] 3336 $ 3337 $ Process1 [P1]: rows_owned=[2] 3338 $ i = {0,3} [size = nrow+1 = 1+1] 3339 $ j = {0,1,2} [size = 3] 3340 $ v = {4,5,6} [size = 3] 3341 3342 .keywords: matrix, aij, compressed row, sparse, parallel 3343 3344 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3345 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3346 @*/ 3347 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3348 { 3349 PetscErrorCode ierr; 3350 3351 PetscFunctionBegin; 3352 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3353 PetscFunctionReturn(0); 3354 } 3355 3356 #undef __FUNCT__ 3357 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3358 /*@C 3359 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3360 (the default parallel PETSc format). For good matrix assembly performance 3361 the user should preallocate the matrix storage by setting the parameters 3362 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3363 performance can be increased by more than a factor of 50. 3364 3365 Collective on MPI_Comm 3366 3367 Input Parameters: 3368 + B - the matrix 3369 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3370 (same value is used for all local rows) 3371 . d_nnz - array containing the number of nonzeros in the various rows of the 3372 DIAGONAL portion of the local submatrix (possibly different for each row) 3373 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3374 The size of this array is equal to the number of local rows, i.e 'm'. 3375 For matrices that will be factored, you must leave room for (and set) 3376 the diagonal entry even if it is zero. 3377 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3378 submatrix (same value is used for all local rows). 3379 - o_nnz - array containing the number of nonzeros in the various rows of the 3380 OFF-DIAGONAL portion of the local submatrix (possibly different for 3381 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3382 structure. The size of this array is equal to the number 3383 of local rows, i.e 'm'. 3384 3385 If the *_nnz parameter is given then the *_nz parameter is ignored 3386 3387 The AIJ format (also called the Yale sparse matrix format or 3388 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3389 storage. The stored row and column indices begin with zero. 3390 See Users-Manual: ch_mat for details. 3391 3392 The parallel matrix is partitioned such that the first m0 rows belong to 3393 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3394 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3395 3396 The DIAGONAL portion of the local submatrix of a processor can be defined 3397 as the submatrix which is obtained by extraction the part corresponding to 3398 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3399 first row that belongs to the processor, r2 is the last row belonging to 3400 the this processor, and c1-c2 is range of indices of the local part of a 3401 vector suitable for applying the matrix to. This is an mxn matrix. In the 3402 common case of a square matrix, the row and column ranges are the same and 3403 the DIAGONAL part is also square. The remaining portion of the local 3404 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3405 3406 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3407 3408 You can call MatGetInfo() to get information on how effective the preallocation was; 3409 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3410 You can also run with the option -info and look for messages with the string 3411 malloc in them to see if additional memory allocation was needed. 3412 3413 Example usage: 3414 3415 Consider the following 8x8 matrix with 34 non-zero values, that is 3416 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3417 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3418 as follows: 3419 3420 .vb 3421 1 2 0 | 0 3 0 | 0 4 3422 Proc0 0 5 6 | 7 0 0 | 8 0 3423 9 0 10 | 11 0 0 | 12 0 3424 ------------------------------------- 3425 13 0 14 | 15 16 17 | 0 0 3426 Proc1 0 18 0 | 19 20 21 | 0 0 3427 0 0 0 | 22 23 0 | 24 0 3428 ------------------------------------- 3429 Proc2 25 26 27 | 0 0 28 | 29 0 3430 30 0 0 | 31 32 33 | 0 34 3431 .ve 3432 3433 This can be represented as a collection of submatrices as: 3434 3435 .vb 3436 A B C 3437 D E F 3438 G H I 3439 .ve 3440 3441 Where the submatrices A,B,C are owned by proc0, D,E,F are 3442 owned by proc1, G,H,I are owned by proc2. 3443 3444 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3445 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3446 The 'M','N' parameters are 8,8, and have the same values on all procs. 3447 3448 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3449 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3450 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3451 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3452 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3453 matrix, ans [DF] as another SeqAIJ matrix. 3454 3455 When d_nz, o_nz parameters are specified, d_nz storage elements are 3456 allocated for every row of the local diagonal submatrix, and o_nz 3457 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3458 One way to choose d_nz and o_nz is to use the max nonzerors per local 3459 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3460 In this case, the values of d_nz,o_nz are: 3461 .vb 3462 proc0 : dnz = 2, o_nz = 2 3463 proc1 : dnz = 3, o_nz = 2 3464 proc2 : dnz = 1, o_nz = 4 3465 .ve 3466 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3467 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3468 for proc3. i.e we are using 12+15+10=37 storage locations to store 3469 34 values. 3470 3471 When d_nnz, o_nnz parameters are specified, the storage is specified 3472 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3473 In the above case the values for d_nnz,o_nnz are: 3474 .vb 3475 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3476 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3477 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3478 .ve 3479 Here the space allocated is sum of all the above values i.e 34, and 3480 hence pre-allocation is perfect. 3481 3482 Level: intermediate 3483 3484 .keywords: matrix, aij, compressed row, sparse, parallel 3485 3486 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3487 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3488 @*/ 3489 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3490 { 3491 PetscErrorCode ierr; 3492 3493 PetscFunctionBegin; 3494 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3495 PetscValidType(B,1); 3496 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3497 PetscFunctionReturn(0); 3498 } 3499 3500 #undef __FUNCT__ 3501 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3502 /*@ 3503 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3504 CSR format the local rows. 3505 3506 Collective on MPI_Comm 3507 3508 Input Parameters: 3509 + comm - MPI communicator 3510 . m - number of local rows (Cannot be PETSC_DECIDE) 3511 . n - This value should be the same as the local size used in creating the 3512 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3513 calculated if N is given) For square matrices n is almost always m. 3514 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3515 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3516 . i - row indices 3517 . j - column indices 3518 - a - matrix values 3519 3520 Output Parameter: 3521 . mat - the matrix 3522 3523 Level: intermediate 3524 3525 Notes: 3526 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3527 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3528 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3529 3530 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3531 3532 The format which is used for the sparse matrix input, is equivalent to a 3533 row-major ordering.. i.e for the following matrix, the input data expected is 3534 as shown 3535 3536 $ 1 0 0 3537 $ 2 0 3 P0 3538 $ ------- 3539 $ 4 5 6 P1 3540 $ 3541 $ Process0 [P0]: rows_owned=[0,1] 3542 $ i = {0,1,3} [size = nrow+1 = 2+1] 3543 $ j = {0,0,2} [size = 3] 3544 $ v = {1,2,3} [size = 3] 3545 $ 3546 $ Process1 [P1]: rows_owned=[2] 3547 $ i = {0,3} [size = nrow+1 = 1+1] 3548 $ j = {0,1,2} [size = 3] 3549 $ v = {4,5,6} [size = 3] 3550 3551 .keywords: matrix, aij, compressed row, sparse, parallel 3552 3553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3554 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3555 @*/ 3556 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3557 { 3558 PetscErrorCode ierr; 3559 3560 PetscFunctionBegin; 3561 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3562 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3563 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3564 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3565 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3566 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3567 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3568 PetscFunctionReturn(0); 3569 } 3570 3571 #undef __FUNCT__ 3572 #define __FUNCT__ "MatCreateAIJ" 3573 /*@C 3574 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3575 (the default parallel PETSc format). For good matrix assembly performance 3576 the user should preallocate the matrix storage by setting the parameters 3577 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3578 performance can be increased by more than a factor of 50. 3579 3580 Collective on MPI_Comm 3581 3582 Input Parameters: 3583 + comm - MPI communicator 3584 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3585 This value should be the same as the local size used in creating the 3586 y vector for the matrix-vector product y = Ax. 3587 . n - This value should be the same as the local size used in creating the 3588 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3589 calculated if N is given) For square matrices n is almost always m. 3590 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3591 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3592 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3593 (same value is used for all local rows) 3594 . d_nnz - array containing the number of nonzeros in the various rows of the 3595 DIAGONAL portion of the local submatrix (possibly different for each row) 3596 or NULL, if d_nz is used to specify the nonzero structure. 3597 The size of this array is equal to the number of local rows, i.e 'm'. 3598 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3599 submatrix (same value is used for all local rows). 3600 - o_nnz - array containing the number of nonzeros in the various rows of the 3601 OFF-DIAGONAL portion of the local submatrix (possibly different for 3602 each row) or NULL, if o_nz is used to specify the nonzero 3603 structure. The size of this array is equal to the number 3604 of local rows, i.e 'm'. 3605 3606 Output Parameter: 3607 . A - the matrix 3608 3609 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3610 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3611 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3612 3613 Notes: 3614 If the *_nnz parameter is given then the *_nz parameter is ignored 3615 3616 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3617 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3618 storage requirements for this matrix. 3619 3620 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3621 processor than it must be used on all processors that share the object for 3622 that argument. 3623 3624 The user MUST specify either the local or global matrix dimensions 3625 (possibly both). 3626 3627 The parallel matrix is partitioned across processors such that the 3628 first m0 rows belong to process 0, the next m1 rows belong to 3629 process 1, the next m2 rows belong to process 2 etc.. where 3630 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3631 values corresponding to [m x N] submatrix. 3632 3633 The columns are logically partitioned with the n0 columns belonging 3634 to 0th partition, the next n1 columns belonging to the next 3635 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3636 3637 The DIAGONAL portion of the local submatrix on any given processor 3638 is the submatrix corresponding to the rows and columns m,n 3639 corresponding to the given processor. i.e diagonal matrix on 3640 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3641 etc. The remaining portion of the local submatrix [m x (N-n)] 3642 constitute the OFF-DIAGONAL portion. The example below better 3643 illustrates this concept. 3644 3645 For a square global matrix we define each processor's diagonal portion 3646 to be its local rows and the corresponding columns (a square submatrix); 3647 each processor's off-diagonal portion encompasses the remainder of the 3648 local matrix (a rectangular submatrix). 3649 3650 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3651 3652 When calling this routine with a single process communicator, a matrix of 3653 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3654 type of communicator, use the construction mechanism: 3655 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3656 3657 By default, this format uses inodes (identical nodes) when possible. 3658 We search for consecutive rows with the same nonzero structure, thereby 3659 reusing matrix information to achieve increased efficiency. 3660 3661 Options Database Keys: 3662 + -mat_no_inode - Do not use inodes 3663 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3664 - -mat_aij_oneindex - Internally use indexing starting at 1 3665 rather than 0. Note that when calling MatSetValues(), 3666 the user still MUST index entries starting at 0! 3667 3668 3669 Example usage: 3670 3671 Consider the following 8x8 matrix with 34 non-zero values, that is 3672 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3673 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3674 as follows: 3675 3676 .vb 3677 1 2 0 | 0 3 0 | 0 4 3678 Proc0 0 5 6 | 7 0 0 | 8 0 3679 9 0 10 | 11 0 0 | 12 0 3680 ------------------------------------- 3681 13 0 14 | 15 16 17 | 0 0 3682 Proc1 0 18 0 | 19 20 21 | 0 0 3683 0 0 0 | 22 23 0 | 24 0 3684 ------------------------------------- 3685 Proc2 25 26 27 | 0 0 28 | 29 0 3686 30 0 0 | 31 32 33 | 0 34 3687 .ve 3688 3689 This can be represented as a collection of submatrices as: 3690 3691 .vb 3692 A B C 3693 D E F 3694 G H I 3695 .ve 3696 3697 Where the submatrices A,B,C are owned by proc0, D,E,F are 3698 owned by proc1, G,H,I are owned by proc2. 3699 3700 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3701 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3702 The 'M','N' parameters are 8,8, and have the same values on all procs. 3703 3704 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3705 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3706 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3707 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3708 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3709 matrix, ans [DF] as another SeqAIJ matrix. 3710 3711 When d_nz, o_nz parameters are specified, d_nz storage elements are 3712 allocated for every row of the local diagonal submatrix, and o_nz 3713 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3714 One way to choose d_nz and o_nz is to use the max nonzerors per local 3715 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3716 In this case, the values of d_nz,o_nz are: 3717 .vb 3718 proc0 : dnz = 2, o_nz = 2 3719 proc1 : dnz = 3, o_nz = 2 3720 proc2 : dnz = 1, o_nz = 4 3721 .ve 3722 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3723 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3724 for proc3. i.e we are using 12+15+10=37 storage locations to store 3725 34 values. 3726 3727 When d_nnz, o_nnz parameters are specified, the storage is specified 3728 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3729 In the above case the values for d_nnz,o_nnz are: 3730 .vb 3731 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3732 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3733 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3734 .ve 3735 Here the space allocated is sum of all the above values i.e 34, and 3736 hence pre-allocation is perfect. 3737 3738 Level: intermediate 3739 3740 .keywords: matrix, aij, compressed row, sparse, parallel 3741 3742 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3743 MPIAIJ, MatCreateMPIAIJWithArrays() 3744 @*/ 3745 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3746 { 3747 PetscErrorCode ierr; 3748 PetscMPIInt size; 3749 3750 PetscFunctionBegin; 3751 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3752 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3753 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3754 if (size > 1) { 3755 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3756 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3757 } else { 3758 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3759 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3760 } 3761 PetscFunctionReturn(0); 3762 } 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3766 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3767 { 3768 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3769 3770 PetscFunctionBegin; 3771 if (Ad) *Ad = a->A; 3772 if (Ao) *Ao = a->B; 3773 if (colmap) *colmap = a->garray; 3774 PetscFunctionReturn(0); 3775 } 3776 3777 #undef __FUNCT__ 3778 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3779 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3780 { 3781 PetscErrorCode ierr; 3782 PetscInt i; 3783 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3784 3785 PetscFunctionBegin; 3786 if (coloring->ctype == IS_COLORING_GLOBAL) { 3787 ISColoringValue *allcolors,*colors; 3788 ISColoring ocoloring; 3789 3790 /* set coloring for diagonal portion */ 3791 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3792 3793 /* set coloring for off-diagonal portion */ 3794 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3795 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3796 for (i=0; i<a->B->cmap->n; i++) { 3797 colors[i] = allcolors[a->garray[i]]; 3798 } 3799 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3800 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3801 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3802 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3803 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3804 ISColoringValue *colors; 3805 PetscInt *larray; 3806 ISColoring ocoloring; 3807 3808 /* set coloring for diagonal portion */ 3809 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3810 for (i=0; i<a->A->cmap->n; i++) { 3811 larray[i] = i + A->cmap->rstart; 3812 } 3813 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3814 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3815 for (i=0; i<a->A->cmap->n; i++) { 3816 colors[i] = coloring->colors[larray[i]]; 3817 } 3818 ierr = PetscFree(larray);CHKERRQ(ierr); 3819 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3820 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3821 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3822 3823 /* set coloring for off-diagonal portion */ 3824 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3825 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3826 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3827 for (i=0; i<a->B->cmap->n; i++) { 3828 colors[i] = coloring->colors[larray[i]]; 3829 } 3830 ierr = PetscFree(larray);CHKERRQ(ierr); 3831 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3832 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3833 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3834 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3835 PetscFunctionReturn(0); 3836 } 3837 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3840 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3841 { 3842 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3843 PetscErrorCode ierr; 3844 3845 PetscFunctionBegin; 3846 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3847 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3848 PetscFunctionReturn(0); 3849 } 3850 3851 #undef __FUNCT__ 3852 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3853 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3854 { 3855 PetscErrorCode ierr; 3856 PetscInt m,N,i,rstart,nnz,Ii; 3857 PetscInt *indx; 3858 PetscScalar *values; 3859 3860 PetscFunctionBegin; 3861 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3862 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3863 PetscInt *dnz,*onz,sum,bs,cbs; 3864 3865 if (n == PETSC_DECIDE) { 3866 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3867 } 3868 /* Check sum(n) = N */ 3869 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3870 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3871 3872 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3873 rstart -= m; 3874 3875 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3876 for (i=0; i<m; i++) { 3877 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3878 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3879 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3880 } 3881 3882 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3883 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3884 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3885 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3886 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3887 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3888 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3889 } 3890 3891 /* numeric phase */ 3892 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3893 for (i=0; i<m; i++) { 3894 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3895 Ii = i + rstart; 3896 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3897 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3898 } 3899 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3900 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3901 PetscFunctionReturn(0); 3902 } 3903 3904 #undef __FUNCT__ 3905 #define __FUNCT__ "MatFileSplit" 3906 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3907 { 3908 PetscErrorCode ierr; 3909 PetscMPIInt rank; 3910 PetscInt m,N,i,rstart,nnz; 3911 size_t len; 3912 const PetscInt *indx; 3913 PetscViewer out; 3914 char *name; 3915 Mat B; 3916 const PetscScalar *values; 3917 3918 PetscFunctionBegin; 3919 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3920 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3921 /* Should this be the type of the diagonal block of A? */ 3922 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3923 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3924 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3925 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3926 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3927 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3928 for (i=0; i<m; i++) { 3929 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3930 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3931 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3932 } 3933 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3934 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3935 3936 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3937 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3938 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3939 sprintf(name,"%s.%d",outfile,rank); 3940 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3941 ierr = PetscFree(name);CHKERRQ(ierr); 3942 ierr = MatView(B,out);CHKERRQ(ierr); 3943 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3944 ierr = MatDestroy(&B);CHKERRQ(ierr); 3945 PetscFunctionReturn(0); 3946 } 3947 3948 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3949 #undef __FUNCT__ 3950 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3951 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3952 { 3953 PetscErrorCode ierr; 3954 Mat_Merge_SeqsToMPI *merge; 3955 PetscContainer container; 3956 3957 PetscFunctionBegin; 3958 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3959 if (container) { 3960 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3961 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3962 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3963 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3964 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3965 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3966 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3967 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3968 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3969 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3970 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3971 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3972 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3973 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3974 ierr = PetscFree(merge);CHKERRQ(ierr); 3975 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3976 } 3977 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3978 PetscFunctionReturn(0); 3979 } 3980 3981 #include <../src/mat/utils/freespace.h> 3982 #include <petscbt.h> 3983 3984 #undef __FUNCT__ 3985 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3986 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3987 { 3988 PetscErrorCode ierr; 3989 MPI_Comm comm; 3990 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3991 PetscMPIInt size,rank,taga,*len_s; 3992 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3993 PetscInt proc,m; 3994 PetscInt **buf_ri,**buf_rj; 3995 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3996 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3997 MPI_Request *s_waits,*r_waits; 3998 MPI_Status *status; 3999 MatScalar *aa=a->a; 4000 MatScalar **abuf_r,*ba_i; 4001 Mat_Merge_SeqsToMPI *merge; 4002 PetscContainer container; 4003 4004 PetscFunctionBegin; 4005 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4006 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4007 4008 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4009 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4010 4011 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4012 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4013 4014 bi = merge->bi; 4015 bj = merge->bj; 4016 buf_ri = merge->buf_ri; 4017 buf_rj = merge->buf_rj; 4018 4019 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4020 owners = merge->rowmap->range; 4021 len_s = merge->len_s; 4022 4023 /* send and recv matrix values */ 4024 /*-----------------------------*/ 4025 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4026 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4027 4028 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4029 for (proc=0,k=0; proc<size; proc++) { 4030 if (!len_s[proc]) continue; 4031 i = owners[proc]; 4032 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4033 k++; 4034 } 4035 4036 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4037 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4038 ierr = PetscFree(status);CHKERRQ(ierr); 4039 4040 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4041 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4042 4043 /* insert mat values of mpimat */ 4044 /*----------------------------*/ 4045 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4046 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4047 4048 for (k=0; k<merge->nrecv; k++) { 4049 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4050 nrows = *(buf_ri_k[k]); 4051 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4052 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4053 } 4054 4055 /* set values of ba */ 4056 m = merge->rowmap->n; 4057 for (i=0; i<m; i++) { 4058 arow = owners[rank] + i; 4059 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4060 bnzi = bi[i+1] - bi[i]; 4061 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4062 4063 /* add local non-zero vals of this proc's seqmat into ba */ 4064 anzi = ai[arow+1] - ai[arow]; 4065 aj = a->j + ai[arow]; 4066 aa = a->a + ai[arow]; 4067 nextaj = 0; 4068 for (j=0; nextaj<anzi; j++) { 4069 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4070 ba_i[j] += aa[nextaj++]; 4071 } 4072 } 4073 4074 /* add received vals into ba */ 4075 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4076 /* i-th row */ 4077 if (i == *nextrow[k]) { 4078 anzi = *(nextai[k]+1) - *nextai[k]; 4079 aj = buf_rj[k] + *(nextai[k]); 4080 aa = abuf_r[k] + *(nextai[k]); 4081 nextaj = 0; 4082 for (j=0; nextaj<anzi; j++) { 4083 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4084 ba_i[j] += aa[nextaj++]; 4085 } 4086 } 4087 nextrow[k]++; nextai[k]++; 4088 } 4089 } 4090 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4091 } 4092 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4093 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4094 4095 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4096 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4097 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4098 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4099 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4100 PetscFunctionReturn(0); 4101 } 4102 4103 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4104 4105 #undef __FUNCT__ 4106 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4107 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4108 { 4109 PetscErrorCode ierr; 4110 Mat B_mpi; 4111 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4112 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4113 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4114 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4115 PetscInt len,proc,*dnz,*onz,bs,cbs; 4116 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4117 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4118 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4119 MPI_Status *status; 4120 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4121 PetscBT lnkbt; 4122 Mat_Merge_SeqsToMPI *merge; 4123 PetscContainer container; 4124 4125 PetscFunctionBegin; 4126 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4127 4128 /* make sure it is a PETSc comm */ 4129 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4130 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4131 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4132 4133 ierr = PetscNew(&merge);CHKERRQ(ierr); 4134 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4135 4136 /* determine row ownership */ 4137 /*---------------------------------------------------------*/ 4138 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4139 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4140 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4141 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4142 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4143 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4144 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4145 4146 m = merge->rowmap->n; 4147 owners = merge->rowmap->range; 4148 4149 /* determine the number of messages to send, their lengths */ 4150 /*---------------------------------------------------------*/ 4151 len_s = merge->len_s; 4152 4153 len = 0; /* length of buf_si[] */ 4154 merge->nsend = 0; 4155 for (proc=0; proc<size; proc++) { 4156 len_si[proc] = 0; 4157 if (proc == rank) { 4158 len_s[proc] = 0; 4159 } else { 4160 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4161 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4162 } 4163 if (len_s[proc]) { 4164 merge->nsend++; 4165 nrows = 0; 4166 for (i=owners[proc]; i<owners[proc+1]; i++) { 4167 if (ai[i+1] > ai[i]) nrows++; 4168 } 4169 len_si[proc] = 2*(nrows+1); 4170 len += len_si[proc]; 4171 } 4172 } 4173 4174 /* determine the number and length of messages to receive for ij-structure */ 4175 /*-------------------------------------------------------------------------*/ 4176 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4177 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4178 4179 /* post the Irecv of j-structure */ 4180 /*-------------------------------*/ 4181 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4182 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4183 4184 /* post the Isend of j-structure */ 4185 /*--------------------------------*/ 4186 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4187 4188 for (proc=0, k=0; proc<size; proc++) { 4189 if (!len_s[proc]) continue; 4190 i = owners[proc]; 4191 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4192 k++; 4193 } 4194 4195 /* receives and sends of j-structure are complete */ 4196 /*------------------------------------------------*/ 4197 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4198 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4199 4200 /* send and recv i-structure */ 4201 /*---------------------------*/ 4202 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4203 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4204 4205 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4206 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4207 for (proc=0,k=0; proc<size; proc++) { 4208 if (!len_s[proc]) continue; 4209 /* form outgoing message for i-structure: 4210 buf_si[0]: nrows to be sent 4211 [1:nrows]: row index (global) 4212 [nrows+1:2*nrows+1]: i-structure index 4213 */ 4214 /*-------------------------------------------*/ 4215 nrows = len_si[proc]/2 - 1; 4216 buf_si_i = buf_si + nrows+1; 4217 buf_si[0] = nrows; 4218 buf_si_i[0] = 0; 4219 nrows = 0; 4220 for (i=owners[proc]; i<owners[proc+1]; i++) { 4221 anzi = ai[i+1] - ai[i]; 4222 if (anzi) { 4223 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4224 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4225 nrows++; 4226 } 4227 } 4228 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4229 k++; 4230 buf_si += len_si[proc]; 4231 } 4232 4233 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4234 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4235 4236 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4237 for (i=0; i<merge->nrecv; i++) { 4238 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4239 } 4240 4241 ierr = PetscFree(len_si);CHKERRQ(ierr); 4242 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4243 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4244 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4245 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4246 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4247 ierr = PetscFree(status);CHKERRQ(ierr); 4248 4249 /* compute a local seq matrix in each processor */ 4250 /*----------------------------------------------*/ 4251 /* allocate bi array and free space for accumulating nonzero column info */ 4252 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4253 bi[0] = 0; 4254 4255 /* create and initialize a linked list */ 4256 nlnk = N+1; 4257 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4258 4259 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4260 len = ai[owners[rank+1]] - ai[owners[rank]]; 4261 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4262 4263 current_space = free_space; 4264 4265 /* determine symbolic info for each local row */ 4266 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4267 4268 for (k=0; k<merge->nrecv; k++) { 4269 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4270 nrows = *buf_ri_k[k]; 4271 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4272 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4273 } 4274 4275 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4276 len = 0; 4277 for (i=0; i<m; i++) { 4278 bnzi = 0; 4279 /* add local non-zero cols of this proc's seqmat into lnk */ 4280 arow = owners[rank] + i; 4281 anzi = ai[arow+1] - ai[arow]; 4282 aj = a->j + ai[arow]; 4283 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4284 bnzi += nlnk; 4285 /* add received col data into lnk */ 4286 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4287 if (i == *nextrow[k]) { /* i-th row */ 4288 anzi = *(nextai[k]+1) - *nextai[k]; 4289 aj = buf_rj[k] + *nextai[k]; 4290 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4291 bnzi += nlnk; 4292 nextrow[k]++; nextai[k]++; 4293 } 4294 } 4295 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4296 4297 /* if free space is not available, make more free space */ 4298 if (current_space->local_remaining<bnzi) { 4299 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4300 nspacedouble++; 4301 } 4302 /* copy data into free space, then initialize lnk */ 4303 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4304 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4305 4306 current_space->array += bnzi; 4307 current_space->local_used += bnzi; 4308 current_space->local_remaining -= bnzi; 4309 4310 bi[i+1] = bi[i] + bnzi; 4311 } 4312 4313 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4314 4315 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4316 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4317 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4318 4319 /* create symbolic parallel matrix B_mpi */ 4320 /*---------------------------------------*/ 4321 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4322 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4323 if (n==PETSC_DECIDE) { 4324 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4325 } else { 4326 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4327 } 4328 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4329 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4330 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4331 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4332 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4333 4334 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4335 B_mpi->assembled = PETSC_FALSE; 4336 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4337 merge->bi = bi; 4338 merge->bj = bj; 4339 merge->buf_ri = buf_ri; 4340 merge->buf_rj = buf_rj; 4341 merge->coi = NULL; 4342 merge->coj = NULL; 4343 merge->owners_co = NULL; 4344 4345 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4346 4347 /* attach the supporting struct to B_mpi for reuse */ 4348 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4349 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4350 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4351 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4352 *mpimat = B_mpi; 4353 4354 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4355 PetscFunctionReturn(0); 4356 } 4357 4358 #undef __FUNCT__ 4359 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4360 /*@C 4361 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4362 matrices from each processor 4363 4364 Collective on MPI_Comm 4365 4366 Input Parameters: 4367 + comm - the communicators the parallel matrix will live on 4368 . seqmat - the input sequential matrices 4369 . m - number of local rows (or PETSC_DECIDE) 4370 . n - number of local columns (or PETSC_DECIDE) 4371 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4372 4373 Output Parameter: 4374 . mpimat - the parallel matrix generated 4375 4376 Level: advanced 4377 4378 Notes: 4379 The dimensions of the sequential matrix in each processor MUST be the same. 4380 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4381 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4382 @*/ 4383 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4384 { 4385 PetscErrorCode ierr; 4386 PetscMPIInt size; 4387 4388 PetscFunctionBegin; 4389 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4390 if (size == 1) { 4391 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4392 if (scall == MAT_INITIAL_MATRIX) { 4393 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4394 } else { 4395 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4396 } 4397 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4398 PetscFunctionReturn(0); 4399 } 4400 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4401 if (scall == MAT_INITIAL_MATRIX) { 4402 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4403 } 4404 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4405 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4406 PetscFunctionReturn(0); 4407 } 4408 4409 #undef __FUNCT__ 4410 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4411 /*@ 4412 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4413 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4414 with MatGetSize() 4415 4416 Not Collective 4417 4418 Input Parameters: 4419 + A - the matrix 4420 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4421 4422 Output Parameter: 4423 . A_loc - the local sequential matrix generated 4424 4425 Level: developer 4426 4427 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4428 4429 @*/ 4430 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4431 { 4432 PetscErrorCode ierr; 4433 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4434 Mat_SeqAIJ *mat,*a,*b; 4435 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4436 MatScalar *aa,*ba,*cam; 4437 PetscScalar *ca; 4438 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4439 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4440 PetscBool match; 4441 MPI_Comm comm; 4442 PetscMPIInt size; 4443 4444 PetscFunctionBegin; 4445 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4446 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4447 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4448 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4449 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4450 4451 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4452 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4453 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4454 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4455 aa = a->a; ba = b->a; 4456 if (scall == MAT_INITIAL_MATRIX) { 4457 if (size == 1) { 4458 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4459 PetscFunctionReturn(0); 4460 } 4461 4462 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4463 ci[0] = 0; 4464 for (i=0; i<am; i++) { 4465 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4466 } 4467 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4468 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4469 k = 0; 4470 for (i=0; i<am; i++) { 4471 ncols_o = bi[i+1] - bi[i]; 4472 ncols_d = ai[i+1] - ai[i]; 4473 /* off-diagonal portion of A */ 4474 for (jo=0; jo<ncols_o; jo++) { 4475 col = cmap[*bj]; 4476 if (col >= cstart) break; 4477 cj[k] = col; bj++; 4478 ca[k++] = *ba++; 4479 } 4480 /* diagonal portion of A */ 4481 for (j=0; j<ncols_d; j++) { 4482 cj[k] = cstart + *aj++; 4483 ca[k++] = *aa++; 4484 } 4485 /* off-diagonal portion of A */ 4486 for (j=jo; j<ncols_o; j++) { 4487 cj[k] = cmap[*bj++]; 4488 ca[k++] = *ba++; 4489 } 4490 } 4491 /* put together the new matrix */ 4492 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4493 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4494 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4495 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4496 mat->free_a = PETSC_TRUE; 4497 mat->free_ij = PETSC_TRUE; 4498 mat->nonew = 0; 4499 } else if (scall == MAT_REUSE_MATRIX) { 4500 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4501 ci = mat->i; cj = mat->j; cam = mat->a; 4502 for (i=0; i<am; i++) { 4503 /* off-diagonal portion of A */ 4504 ncols_o = bi[i+1] - bi[i]; 4505 for (jo=0; jo<ncols_o; jo++) { 4506 col = cmap[*bj]; 4507 if (col >= cstart) break; 4508 *cam++ = *ba++; bj++; 4509 } 4510 /* diagonal portion of A */ 4511 ncols_d = ai[i+1] - ai[i]; 4512 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4513 /* off-diagonal portion of A */ 4514 for (j=jo; j<ncols_o; j++) { 4515 *cam++ = *ba++; bj++; 4516 } 4517 } 4518 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4519 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4520 PetscFunctionReturn(0); 4521 } 4522 4523 #undef __FUNCT__ 4524 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4525 /*@C 4526 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4527 4528 Not Collective 4529 4530 Input Parameters: 4531 + A - the matrix 4532 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4533 - row, col - index sets of rows and columns to extract (or NULL) 4534 4535 Output Parameter: 4536 . A_loc - the local sequential matrix generated 4537 4538 Level: developer 4539 4540 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4541 4542 @*/ 4543 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4544 { 4545 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4546 PetscErrorCode ierr; 4547 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4548 IS isrowa,iscola; 4549 Mat *aloc; 4550 PetscBool match; 4551 4552 PetscFunctionBegin; 4553 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4554 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4555 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4556 if (!row) { 4557 start = A->rmap->rstart; end = A->rmap->rend; 4558 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4559 } else { 4560 isrowa = *row; 4561 } 4562 if (!col) { 4563 start = A->cmap->rstart; 4564 cmap = a->garray; 4565 nzA = a->A->cmap->n; 4566 nzB = a->B->cmap->n; 4567 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4568 ncols = 0; 4569 for (i=0; i<nzB; i++) { 4570 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4571 else break; 4572 } 4573 imark = i; 4574 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4575 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4576 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4577 } else { 4578 iscola = *col; 4579 } 4580 if (scall != MAT_INITIAL_MATRIX) { 4581 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4582 aloc[0] = *A_loc; 4583 } 4584 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4585 *A_loc = aloc[0]; 4586 ierr = PetscFree(aloc);CHKERRQ(ierr); 4587 if (!row) { 4588 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4589 } 4590 if (!col) { 4591 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4592 } 4593 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4594 PetscFunctionReturn(0); 4595 } 4596 4597 #undef __FUNCT__ 4598 #define __FUNCT__ "MatGetBrowsOfAcols" 4599 /*@C 4600 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4601 4602 Collective on Mat 4603 4604 Input Parameters: 4605 + A,B - the matrices in mpiaij format 4606 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4607 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4608 4609 Output Parameter: 4610 + rowb, colb - index sets of rows and columns of B to extract 4611 - B_seq - the sequential matrix generated 4612 4613 Level: developer 4614 4615 @*/ 4616 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4617 { 4618 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4619 PetscErrorCode ierr; 4620 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4621 IS isrowb,iscolb; 4622 Mat *bseq=NULL; 4623 4624 PetscFunctionBegin; 4625 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4626 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4627 } 4628 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4629 4630 if (scall == MAT_INITIAL_MATRIX) { 4631 start = A->cmap->rstart; 4632 cmap = a->garray; 4633 nzA = a->A->cmap->n; 4634 nzB = a->B->cmap->n; 4635 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4636 ncols = 0; 4637 for (i=0; i<nzB; i++) { /* row < local row index */ 4638 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4639 else break; 4640 } 4641 imark = i; 4642 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4643 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4644 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4645 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4646 } else { 4647 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4648 isrowb = *rowb; iscolb = *colb; 4649 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4650 bseq[0] = *B_seq; 4651 } 4652 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4653 *B_seq = bseq[0]; 4654 ierr = PetscFree(bseq);CHKERRQ(ierr); 4655 if (!rowb) { 4656 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4657 } else { 4658 *rowb = isrowb; 4659 } 4660 if (!colb) { 4661 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4662 } else { 4663 *colb = iscolb; 4664 } 4665 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4666 PetscFunctionReturn(0); 4667 } 4668 4669 #undef __FUNCT__ 4670 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4671 /* 4672 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4673 of the OFF-DIAGONAL portion of local A 4674 4675 Collective on Mat 4676 4677 Input Parameters: 4678 + A,B - the matrices in mpiaij format 4679 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4680 4681 Output Parameter: 4682 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4683 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4684 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4685 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4686 4687 Level: developer 4688 4689 */ 4690 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4691 { 4692 VecScatter_MPI_General *gen_to,*gen_from; 4693 PetscErrorCode ierr; 4694 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4695 Mat_SeqAIJ *b_oth; 4696 VecScatter ctx =a->Mvctx; 4697 MPI_Comm comm; 4698 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4699 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4700 PetscScalar *rvalues,*svalues; 4701 MatScalar *b_otha,*bufa,*bufA; 4702 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4703 MPI_Request *rwaits = NULL,*swaits = NULL; 4704 MPI_Status *sstatus,rstatus; 4705 PetscMPIInt jj,size; 4706 PetscInt *cols,sbs,rbs; 4707 PetscScalar *vals; 4708 4709 PetscFunctionBegin; 4710 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4711 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4712 4713 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4714 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4715 } 4716 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4717 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4718 4719 gen_to = (VecScatter_MPI_General*)ctx->todata; 4720 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4721 rvalues = gen_from->values; /* holds the length of receiving row */ 4722 svalues = gen_to->values; /* holds the length of sending row */ 4723 nrecvs = gen_from->n; 4724 nsends = gen_to->n; 4725 4726 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4727 srow = gen_to->indices; /* local row index to be sent */ 4728 sstarts = gen_to->starts; 4729 sprocs = gen_to->procs; 4730 sstatus = gen_to->sstatus; 4731 sbs = gen_to->bs; 4732 rstarts = gen_from->starts; 4733 rprocs = gen_from->procs; 4734 rbs = gen_from->bs; 4735 4736 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4737 if (scall == MAT_INITIAL_MATRIX) { 4738 /* i-array */ 4739 /*---------*/ 4740 /* post receives */ 4741 for (i=0; i<nrecvs; i++) { 4742 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4743 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4744 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4745 } 4746 4747 /* pack the outgoing message */ 4748 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4749 4750 sstartsj[0] = 0; 4751 rstartsj[0] = 0; 4752 len = 0; /* total length of j or a array to be sent */ 4753 k = 0; 4754 for (i=0; i<nsends; i++) { 4755 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4756 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4757 for (j=0; j<nrows; j++) { 4758 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4759 for (l=0; l<sbs; l++) { 4760 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4761 4762 rowlen[j*sbs+l] = ncols; 4763 4764 len += ncols; 4765 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4766 } 4767 k++; 4768 } 4769 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4770 4771 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4772 } 4773 /* recvs and sends of i-array are completed */ 4774 i = nrecvs; 4775 while (i--) { 4776 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4777 } 4778 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4779 4780 /* allocate buffers for sending j and a arrays */ 4781 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4782 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4783 4784 /* create i-array of B_oth */ 4785 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4786 4787 b_othi[0] = 0; 4788 len = 0; /* total length of j or a array to be received */ 4789 k = 0; 4790 for (i=0; i<nrecvs; i++) { 4791 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4792 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4793 for (j=0; j<nrows; j++) { 4794 b_othi[k+1] = b_othi[k] + rowlen[j]; 4795 ierr = PetscIntSumError(rowlen[j],len,&len); 4796 k++; 4797 } 4798 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4799 } 4800 4801 /* allocate space for j and a arrrays of B_oth */ 4802 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4803 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4804 4805 /* j-array */ 4806 /*---------*/ 4807 /* post receives of j-array */ 4808 for (i=0; i<nrecvs; i++) { 4809 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4810 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4811 } 4812 4813 /* pack the outgoing message j-array */ 4814 k = 0; 4815 for (i=0; i<nsends; i++) { 4816 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4817 bufJ = bufj+sstartsj[i]; 4818 for (j=0; j<nrows; j++) { 4819 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4820 for (ll=0; ll<sbs; ll++) { 4821 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4822 for (l=0; l<ncols; l++) { 4823 *bufJ++ = cols[l]; 4824 } 4825 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4826 } 4827 } 4828 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4829 } 4830 4831 /* recvs and sends of j-array are completed */ 4832 i = nrecvs; 4833 while (i--) { 4834 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4835 } 4836 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4837 } else if (scall == MAT_REUSE_MATRIX) { 4838 sstartsj = *startsj_s; 4839 rstartsj = *startsj_r; 4840 bufa = *bufa_ptr; 4841 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4842 b_otha = b_oth->a; 4843 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4844 4845 /* a-array */ 4846 /*---------*/ 4847 /* post receives of a-array */ 4848 for (i=0; i<nrecvs; i++) { 4849 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4850 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4851 } 4852 4853 /* pack the outgoing message a-array */ 4854 k = 0; 4855 for (i=0; i<nsends; i++) { 4856 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4857 bufA = bufa+sstartsj[i]; 4858 for (j=0; j<nrows; j++) { 4859 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4860 for (ll=0; ll<sbs; ll++) { 4861 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4862 for (l=0; l<ncols; l++) { 4863 *bufA++ = vals[l]; 4864 } 4865 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4866 } 4867 } 4868 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4869 } 4870 /* recvs and sends of a-array are completed */ 4871 i = nrecvs; 4872 while (i--) { 4873 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4874 } 4875 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4876 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4877 4878 if (scall == MAT_INITIAL_MATRIX) { 4879 /* put together the new matrix */ 4880 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4881 4882 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4883 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4884 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4885 b_oth->free_a = PETSC_TRUE; 4886 b_oth->free_ij = PETSC_TRUE; 4887 b_oth->nonew = 0; 4888 4889 ierr = PetscFree(bufj);CHKERRQ(ierr); 4890 if (!startsj_s || !bufa_ptr) { 4891 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4892 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4893 } else { 4894 *startsj_s = sstartsj; 4895 *startsj_r = rstartsj; 4896 *bufa_ptr = bufa; 4897 } 4898 } 4899 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 4903 #undef __FUNCT__ 4904 #define __FUNCT__ "MatGetCommunicationStructs" 4905 /*@C 4906 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4907 4908 Not Collective 4909 4910 Input Parameters: 4911 . A - The matrix in mpiaij format 4912 4913 Output Parameter: 4914 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4915 . colmap - A map from global column index to local index into lvec 4916 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4917 4918 Level: developer 4919 4920 @*/ 4921 #if defined(PETSC_USE_CTABLE) 4922 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4923 #else 4924 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4925 #endif 4926 { 4927 Mat_MPIAIJ *a; 4928 4929 PetscFunctionBegin; 4930 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4931 PetscValidPointer(lvec, 2); 4932 PetscValidPointer(colmap, 3); 4933 PetscValidPointer(multScatter, 4); 4934 a = (Mat_MPIAIJ*) A->data; 4935 if (lvec) *lvec = a->lvec; 4936 if (colmap) *colmap = a->colmap; 4937 if (multScatter) *multScatter = a->Mvctx; 4938 PetscFunctionReturn(0); 4939 } 4940 4941 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4942 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4943 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4944 #if defined(PETSC_HAVE_ELEMENTAL) 4945 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4946 #endif 4947 4948 #undef __FUNCT__ 4949 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4950 /* 4951 Computes (B'*A')' since computing B*A directly is untenable 4952 4953 n p p 4954 ( ) ( ) ( ) 4955 m ( A ) * n ( B ) = m ( C ) 4956 ( ) ( ) ( ) 4957 4958 */ 4959 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4960 { 4961 PetscErrorCode ierr; 4962 Mat At,Bt,Ct; 4963 4964 PetscFunctionBegin; 4965 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4966 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4967 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4968 ierr = MatDestroy(&At);CHKERRQ(ierr); 4969 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4970 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4971 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4972 PetscFunctionReturn(0); 4973 } 4974 4975 #undef __FUNCT__ 4976 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4977 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4978 { 4979 PetscErrorCode ierr; 4980 PetscInt m=A->rmap->n,n=B->cmap->n; 4981 Mat Cmat; 4982 4983 PetscFunctionBegin; 4984 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4985 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4986 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4987 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4988 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4989 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4990 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4991 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4992 4993 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4994 4995 *C = Cmat; 4996 PetscFunctionReturn(0); 4997 } 4998 4999 /* ----------------------------------------------------------------*/ 5000 #undef __FUNCT__ 5001 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5002 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5003 { 5004 PetscErrorCode ierr; 5005 5006 PetscFunctionBegin; 5007 if (scall == MAT_INITIAL_MATRIX) { 5008 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5009 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5010 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5011 } 5012 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5013 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5014 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5015 PetscFunctionReturn(0); 5016 } 5017 5018 /*MC 5019 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5020 5021 Options Database Keys: 5022 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5023 5024 Level: beginner 5025 5026 .seealso: MatCreateAIJ() 5027 M*/ 5028 5029 #undef __FUNCT__ 5030 #define __FUNCT__ "MatCreate_MPIAIJ" 5031 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5032 { 5033 Mat_MPIAIJ *b; 5034 PetscErrorCode ierr; 5035 PetscMPIInt size; 5036 5037 PetscFunctionBegin; 5038 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5039 5040 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5041 B->data = (void*)b; 5042 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5043 B->assembled = PETSC_FALSE; 5044 B->insertmode = NOT_SET_VALUES; 5045 b->size = size; 5046 5047 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5048 5049 /* build cache for off array entries formed */ 5050 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5051 5052 b->donotstash = PETSC_FALSE; 5053 b->colmap = 0; 5054 b->garray = 0; 5055 b->roworiented = PETSC_TRUE; 5056 5057 /* stuff used for matrix vector multiply */ 5058 b->lvec = NULL; 5059 b->Mvctx = NULL; 5060 5061 /* stuff for MatGetRow() */ 5062 b->rowindices = 0; 5063 b->rowvalues = 0; 5064 b->getrowactive = PETSC_FALSE; 5065 5066 /* flexible pointer used in CUSP/CUSPARSE classes */ 5067 b->spptr = NULL; 5068 5069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5072 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5079 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5080 #if defined(PETSC_HAVE_ELEMENTAL) 5081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5082 #endif 5083 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5087 PetscFunctionReturn(0); 5088 } 5089 5090 #undef __FUNCT__ 5091 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5092 /*@C 5093 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5094 and "off-diagonal" part of the matrix in CSR format. 5095 5096 Collective on MPI_Comm 5097 5098 Input Parameters: 5099 + comm - MPI communicator 5100 . m - number of local rows (Cannot be PETSC_DECIDE) 5101 . n - This value should be the same as the local size used in creating the 5102 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5103 calculated if N is given) For square matrices n is almost always m. 5104 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5105 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5106 . i - row indices for "diagonal" portion of matrix 5107 . j - column indices 5108 . a - matrix values 5109 . oi - row indices for "off-diagonal" portion of matrix 5110 . oj - column indices 5111 - oa - matrix values 5112 5113 Output Parameter: 5114 . mat - the matrix 5115 5116 Level: advanced 5117 5118 Notes: 5119 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5120 must free the arrays once the matrix has been destroyed and not before. 5121 5122 The i and j indices are 0 based 5123 5124 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5125 5126 This sets local rows and cannot be used to set off-processor values. 5127 5128 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5129 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5130 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5131 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5132 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5133 communication if it is known that only local entries will be set. 5134 5135 .keywords: matrix, aij, compressed row, sparse, parallel 5136 5137 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5138 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5139 @*/ 5140 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5141 { 5142 PetscErrorCode ierr; 5143 Mat_MPIAIJ *maij; 5144 5145 PetscFunctionBegin; 5146 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5147 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5148 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5149 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5150 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5151 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5152 maij = (Mat_MPIAIJ*) (*mat)->data; 5153 5154 (*mat)->preallocated = PETSC_TRUE; 5155 5156 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5157 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5158 5159 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5160 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5161 5162 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5163 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5164 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5165 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5166 5167 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5168 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5169 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5170 PetscFunctionReturn(0); 5171 } 5172 5173 /* 5174 Special version for direct calls from Fortran 5175 */ 5176 #include <petsc/private/fortranimpl.h> 5177 5178 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5179 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5180 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5181 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5182 #endif 5183 5184 /* Change these macros so can be used in void function */ 5185 #undef CHKERRQ 5186 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5187 #undef SETERRQ2 5188 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5189 #undef SETERRQ3 5190 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5191 #undef SETERRQ 5192 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5193 5194 #undef __FUNCT__ 5195 #define __FUNCT__ "matsetvaluesmpiaij_" 5196 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5197 { 5198 Mat mat = *mmat; 5199 PetscInt m = *mm, n = *mn; 5200 InsertMode addv = *maddv; 5201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5202 PetscScalar value; 5203 PetscErrorCode ierr; 5204 5205 MatCheckPreallocated(mat,1); 5206 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5207 5208 #if defined(PETSC_USE_DEBUG) 5209 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5210 #endif 5211 { 5212 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5213 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5214 PetscBool roworiented = aij->roworiented; 5215 5216 /* Some Variables required in the macro */ 5217 Mat A = aij->A; 5218 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5219 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5220 MatScalar *aa = a->a; 5221 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5222 Mat B = aij->B; 5223 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5224 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5225 MatScalar *ba = b->a; 5226 5227 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5228 PetscInt nonew = a->nonew; 5229 MatScalar *ap1,*ap2; 5230 5231 PetscFunctionBegin; 5232 for (i=0; i<m; i++) { 5233 if (im[i] < 0) continue; 5234 #if defined(PETSC_USE_DEBUG) 5235 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5236 #endif 5237 if (im[i] >= rstart && im[i] < rend) { 5238 row = im[i] - rstart; 5239 lastcol1 = -1; 5240 rp1 = aj + ai[row]; 5241 ap1 = aa + ai[row]; 5242 rmax1 = aimax[row]; 5243 nrow1 = ailen[row]; 5244 low1 = 0; 5245 high1 = nrow1; 5246 lastcol2 = -1; 5247 rp2 = bj + bi[row]; 5248 ap2 = ba + bi[row]; 5249 rmax2 = bimax[row]; 5250 nrow2 = bilen[row]; 5251 low2 = 0; 5252 high2 = nrow2; 5253 5254 for (j=0; j<n; j++) { 5255 if (roworiented) value = v[i*n+j]; 5256 else value = v[i+j*m]; 5257 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5258 if (in[j] >= cstart && in[j] < cend) { 5259 col = in[j] - cstart; 5260 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5261 } else if (in[j] < 0) continue; 5262 #if defined(PETSC_USE_DEBUG) 5263 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5264 #endif 5265 else { 5266 if (mat->was_assembled) { 5267 if (!aij->colmap) { 5268 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5269 } 5270 #if defined(PETSC_USE_CTABLE) 5271 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5272 col--; 5273 #else 5274 col = aij->colmap[in[j]] - 1; 5275 #endif 5276 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5277 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5278 col = in[j]; 5279 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5280 B = aij->B; 5281 b = (Mat_SeqAIJ*)B->data; 5282 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5283 rp2 = bj + bi[row]; 5284 ap2 = ba + bi[row]; 5285 rmax2 = bimax[row]; 5286 nrow2 = bilen[row]; 5287 low2 = 0; 5288 high2 = nrow2; 5289 bm = aij->B->rmap->n; 5290 ba = b->a; 5291 } 5292 } else col = in[j]; 5293 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5294 } 5295 } 5296 } else if (!aij->donotstash) { 5297 if (roworiented) { 5298 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5299 } else { 5300 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5301 } 5302 } 5303 } 5304 } 5305 PetscFunctionReturnVoid(); 5306 } 5307 5308