1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscMPIInt size = mat->size; 748 PetscSF sf; 749 PetscInt *lrows; 750 PetscSFNode *rrows; 751 PetscInt lastidx = -1, r, p = 0, len = 0; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 /* Create SF where leaves are input rows and roots are owned rows */ 756 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 757 for (r = 0; r < n; ++r) lrows[r] = -1; 758 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 759 for (r = 0; r < N; ++r) { 760 const PetscInt idx = rows[r]; 761 PetscBool found = PETSC_FALSE; 762 /* Trick for efficient searching for sorted rows */ 763 if (lastidx > idx) p = 0; 764 lastidx = idx; 765 for (; p < size; ++p) { 766 if (idx >= owners[p] && idx < owners[p+1]) { 767 rrows[r].rank = p; 768 rrows[r].index = rows[r] - owners[p]; 769 found = PETSC_TRUE; 770 break; 771 } 772 } 773 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 774 } 775 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 776 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 777 /* Collect flags for rows to be zeroed */ 778 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 779 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 780 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 781 /* Compress and put in row numbers */ 782 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 783 /* fix right hand side if needed */ 784 if (x && b) { 785 const PetscScalar *xx; 786 PetscScalar *bb; 787 788 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 789 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 790 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 791 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 793 } 794 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 795 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 796 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 823 { 824 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 825 PetscErrorCode ierr; 826 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 827 PetscInt i,j,r,m,p = 0,len = 0; 828 PetscInt *lrows,*owners = A->rmap->range; 829 PetscSFNode *rrows; 830 PetscSF sf; 831 const PetscScalar *xx; 832 PetscScalar *bb,*mask; 833 Vec xmask,lmask; 834 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 835 const PetscInt *aj, *ii,*ridx; 836 PetscScalar *aa; 837 #if defined(PETSC_DEBUG) 838 PetscBool found = PETSC_FALSE; 839 #endif 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 PetscBool found = PETSC_FALSE; 849 /* Trick for efficient searching for sorted rows */ 850 if (lastidx > idx) p = 0; 851 lastidx = idx; 852 for (; p < size; ++p) { 853 if (idx >= owners[p] && idx < owners[p+1]) { 854 rrows[r].rank = p; 855 rrows[r].index = rows[r] - owners[p]; 856 found = PETSC_TRUE; 857 break; 858 } 859 } 860 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 861 } 862 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 863 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 864 /* Collect flags for rows to be zeroed */ 865 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 866 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 867 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 872 /* handle off diagonal part of matrix */ 873 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 874 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 875 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 878 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 881 if (x) { 882 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 885 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 886 } 887 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 888 /* remove zeroed rows of off diagonal matrix */ 889 ii = aij->i; 890 for (i=0; i<len; i++) { 891 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 /* loop over all elements of off process part of matrix zeroing removed columns*/ 894 if (aij->compressedrow.use) { 895 m = aij->compressedrow.nrows; 896 ii = aij->compressedrow.i; 897 ridx = aij->compressedrow.rindex; 898 for (i=0; i<m; i++) { 899 n = ii[i+1] - ii[i]; 900 aj = aij->j + ii[i]; 901 aa = aij->a + ii[i]; 902 903 for (j=0; j<n; j++) { 904 if (PetscAbsScalar(mask[*aj])) { 905 if (b) bb[*ridx] -= *aa*xx[*aj]; 906 *aa = 0.0; 907 } 908 aa++; 909 aj++; 910 } 911 ridx++; 912 } 913 } else { /* do not use compressed row format */ 914 m = l->B->rmap->n; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 for (j=0; j<n; j++) { 920 if (PetscAbsScalar(mask[*aj])) { 921 if (b) bb[i] -= *aa*xx[*aj]; 922 *aa = 0.0; 923 } 924 aa++; 925 aj++; 926 } 927 } 928 } 929 if (x) { 930 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 932 } 933 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 934 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 935 ierr = PetscFree(lrows);CHKERRQ(ierr); 936 937 /* only change matrix nonzero state if pattern was allowed to be changed */ 938 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 939 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 940 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMult_MPIAIJ" 947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 948 { 949 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 950 PetscErrorCode ierr; 951 PetscInt nt; 952 953 PetscFunctionBegin; 954 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 955 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 956 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 957 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 958 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 966 { 967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 968 PetscErrorCode ierr; 969 970 PetscFunctionBegin; 971 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMultAdd_MPIAIJ" 977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 985 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 993 { 994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 995 PetscErrorCode ierr; 996 PetscBool merged; 997 998 PetscFunctionBegin; 999 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1000 /* do nondiagonal part */ 1001 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1002 if (!merged) { 1003 /* send it on its way */ 1004 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* receive remote parts: note this assumes the values are not actually */ 1008 /* added in yy until the next line, */ 1009 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1010 } else { 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* send it on its way */ 1014 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1015 /* values actually were received in the Begin() but we need to call this nop */ 1016 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1023 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1024 { 1025 MPI_Comm comm; 1026 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1027 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1028 IS Me,Notme; 1029 PetscErrorCode ierr; 1030 PetscInt M,N,first,last,*notme,i; 1031 PetscMPIInt size; 1032 1033 PetscFunctionBegin; 1034 /* Easy test: symmetric diagonal block */ 1035 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1036 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1037 if (!*f) PetscFunctionReturn(0); 1038 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1039 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1040 if (size == 1) PetscFunctionReturn(0); 1041 1042 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1043 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1044 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1045 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1046 for (i=0; i<first; i++) notme[i] = i; 1047 for (i=last; i<M; i++) notme[i-last+first] = i; 1048 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1049 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1050 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1051 Aoff = Aoffs[0]; 1052 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1053 Boff = Boffs[0]; 1054 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1055 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1056 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1057 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1058 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1059 ierr = PetscFree(notme);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 /* do nondiagonal part */ 1072 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1073 /* send it on its way */ 1074 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 /* do local part */ 1076 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1077 /* receive remote parts */ 1078 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1089 { 1090 PetscErrorCode ierr; 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 1093 PetscFunctionBegin; 1094 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1095 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1096 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatScale_MPIAIJ" 1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 1107 PetscFunctionBegin; 1108 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1109 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatDestroy_MatRedundant" 1115 PetscErrorCode MatDestroy_MatRedundant(Mat A) 1116 { 1117 PetscErrorCode ierr; 1118 Mat_Redundant *redund; 1119 PetscInt i; 1120 PetscMPIInt size; 1121 1122 PetscFunctionBegin; 1123 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1124 if (size == 1) { 1125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1126 redund = a->redundant; 1127 } else { 1128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1129 redund = a->redundant; 1130 } 1131 if (redund){ 1132 if (redund->matseq) { /* via MatGetSubMatrices() */ 1133 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1134 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1135 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1136 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1137 } else { 1138 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1139 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1140 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1141 for (i=0; i<redund->nrecvs; i++) { 1142 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1143 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1144 } 1145 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1146 } 1147 1148 if (redund->psubcomm) { 1149 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1150 } 1151 ierr = PetscFree(redund);CHKERRQ(ierr); 1152 } 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatDestroy_MPIAIJ" 1158 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1159 { 1160 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 #if defined(PETSC_USE_LOG) 1165 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1166 #endif 1167 ierr = MatDestroy_MatRedundant(mat);CHKERRQ(ierr); 1168 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1169 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1170 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1171 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1172 #if defined(PETSC_USE_CTABLE) 1173 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1174 #else 1175 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1176 #endif 1177 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1178 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1179 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1180 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1181 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1182 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1183 1184 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1186 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1189 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1190 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1191 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1192 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1198 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1199 { 1200 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1201 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1202 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1203 PetscErrorCode ierr; 1204 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1205 int fd; 1206 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1207 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1208 PetscScalar *column_values; 1209 PetscInt message_count,flowcontrolcount; 1210 FILE *file; 1211 1212 PetscFunctionBegin; 1213 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1214 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1215 nz = A->nz + B->nz; 1216 if (!rank) { 1217 header[0] = MAT_FILE_CLASSID; 1218 header[1] = mat->rmap->N; 1219 header[2] = mat->cmap->N; 1220 1221 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1223 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1224 /* get largest number of rows any processor has */ 1225 rlen = mat->rmap->n; 1226 range = mat->rmap->range; 1227 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1228 } else { 1229 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 rlen = mat->rmap->n; 1231 } 1232 1233 /* load up the local row counts */ 1234 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1235 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1236 1237 /* store the row lengths to the file */ 1238 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1239 if (!rank) { 1240 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1241 for (i=1; i<size; i++) { 1242 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1243 rlen = range[i+1] - range[i]; 1244 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1246 } 1247 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1248 } else { 1249 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1250 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1254 1255 /* load up the local column indices */ 1256 nzmax = nz; /* th processor needs space a largest processor needs */ 1257 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1258 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1259 cnt = 0; 1260 for (i=0; i<mat->rmap->n; i++) { 1261 for (j=B->i[i]; j<B->i[i+1]; j++) { 1262 if ((col = garray[B->j[j]]) > cstart) break; 1263 column_indices[cnt++] = col; 1264 } 1265 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1266 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1267 } 1268 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1269 1270 /* store the column indices to the file */ 1271 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1272 if (!rank) { 1273 MPI_Status status; 1274 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1275 for (i=1; i<size; i++) { 1276 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1277 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1278 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1279 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1280 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 } 1282 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1283 } else { 1284 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1285 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1286 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1287 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1288 } 1289 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1290 1291 /* load up the local column values */ 1292 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1293 cnt = 0; 1294 for (i=0; i<mat->rmap->n; i++) { 1295 for (j=B->i[i]; j<B->i[i+1]; j++) { 1296 if (garray[B->j[j]] > cstart) break; 1297 column_values[cnt++] = B->a[j]; 1298 } 1299 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1300 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1301 } 1302 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1303 1304 /* store the column values to the file */ 1305 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1306 if (!rank) { 1307 MPI_Status status; 1308 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1309 for (i=1; i<size; i++) { 1310 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1311 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1312 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1313 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1315 } 1316 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1317 } else { 1318 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1319 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1320 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1321 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1322 } 1323 ierr = PetscFree(column_values);CHKERRQ(ierr); 1324 1325 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1326 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 #include <petscdraw.h> 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1333 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1334 { 1335 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1336 PetscErrorCode ierr; 1337 PetscMPIInt rank = aij->rank,size = aij->size; 1338 PetscBool isdraw,iascii,isbinary; 1339 PetscViewer sviewer; 1340 PetscViewerFormat format; 1341 1342 PetscFunctionBegin; 1343 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1344 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1345 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1346 if (iascii) { 1347 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1348 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1349 MatInfo info; 1350 PetscBool inodes; 1351 1352 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1353 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1354 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1356 if (!inodes) { 1357 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1358 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1361 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1362 } 1363 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1364 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1365 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1366 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1367 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1368 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1369 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1370 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1373 PetscInt inodecount,inodelimit,*inodes; 1374 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1375 if (inodes) { 1376 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1377 } else { 1378 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1382 PetscFunctionReturn(0); 1383 } 1384 } else if (isbinary) { 1385 if (size == 1) { 1386 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1387 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1388 } else { 1389 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1390 } 1391 PetscFunctionReturn(0); 1392 } else if (isdraw) { 1393 PetscDraw draw; 1394 PetscBool isnull; 1395 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1396 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1397 } 1398 1399 { 1400 /* assemble the entire matrix onto first processor. */ 1401 Mat A; 1402 Mat_SeqAIJ *Aloc; 1403 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1404 MatScalar *a; 1405 1406 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1407 if (!rank) { 1408 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1409 } else { 1410 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1411 } 1412 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1413 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1414 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1415 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1416 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1417 1418 /* copy over the A part */ 1419 Aloc = (Mat_SeqAIJ*)aij->A->data; 1420 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1421 row = mat->rmap->rstart; 1422 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1423 for (i=0; i<m; i++) { 1424 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1425 row++; 1426 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1427 } 1428 aj = Aloc->j; 1429 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1430 1431 /* copy over the B part */ 1432 Aloc = (Mat_SeqAIJ*)aij->B->data; 1433 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1434 row = mat->rmap->rstart; 1435 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1436 ct = cols; 1437 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1438 for (i=0; i<m; i++) { 1439 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1440 row++; 1441 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1442 } 1443 ierr = PetscFree(ct);CHKERRQ(ierr); 1444 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1445 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 /* 1447 Everyone has to call to draw the matrix since the graphics waits are 1448 synchronized across all processors that share the PetscDraw object 1449 */ 1450 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1451 if (!rank) { 1452 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1453 } 1454 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1455 ierr = MatDestroy(&A);CHKERRQ(ierr); 1456 } 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatView_MPIAIJ" 1462 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1463 { 1464 PetscErrorCode ierr; 1465 PetscBool iascii,isdraw,issocket,isbinary; 1466 1467 PetscFunctionBegin; 1468 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1469 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1470 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1471 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1472 if (iascii || isdraw || isbinary || issocket) { 1473 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1474 } 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatSOR_MPIAIJ" 1480 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1481 { 1482 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1483 PetscErrorCode ierr; 1484 Vec bb1 = 0; 1485 PetscBool hasop; 1486 1487 PetscFunctionBegin; 1488 if (flag == SOR_APPLY_UPPER) { 1489 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1494 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1495 } 1496 1497 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1498 if (flag & SOR_ZERO_INITIAL_GUESS) { 1499 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1500 its--; 1501 } 1502 1503 while (its--) { 1504 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 1507 /* update rhs: bb1 = bb - B*x */ 1508 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1509 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1510 1511 /* local sweep */ 1512 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1513 } 1514 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1515 if (flag & SOR_ZERO_INITIAL_GUESS) { 1516 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1517 its--; 1518 } 1519 while (its--) { 1520 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 1523 /* update rhs: bb1 = bb - B*x */ 1524 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1525 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1526 1527 /* local sweep */ 1528 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1529 } 1530 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1531 if (flag & SOR_ZERO_INITIAL_GUESS) { 1532 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1533 its--; 1534 } 1535 while (its--) { 1536 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 1539 /* update rhs: bb1 = bb - B*x */ 1540 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1541 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1542 1543 /* local sweep */ 1544 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1545 } 1546 } else if (flag & SOR_EISENSTAT) { 1547 Vec xx1; 1548 1549 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1550 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1551 1552 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1554 if (!mat->diag) { 1555 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1556 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1557 } 1558 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1559 if (hasop) { 1560 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1561 } else { 1562 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1563 } 1564 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1565 1566 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1567 1568 /* local sweep */ 1569 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1570 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1571 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1572 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1573 1574 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatPermute_MPIAIJ" 1580 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1581 { 1582 Mat aA,aB,Aperm; 1583 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1584 PetscScalar *aa,*ba; 1585 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1586 PetscSF rowsf,sf; 1587 IS parcolp = NULL; 1588 PetscBool done; 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1593 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1594 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1595 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1596 1597 /* Invert row permutation to find out where my rows should go */ 1598 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1599 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1600 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1601 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1602 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1603 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1604 1605 /* Invert column permutation to find out where my columns should go */ 1606 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1607 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1608 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1609 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1610 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1611 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1612 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1613 1614 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1615 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1616 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1617 1618 /* Find out where my gcols should go */ 1619 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1620 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1621 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1622 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1623 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1624 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1625 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1626 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1627 1628 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1629 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1630 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1631 for (i=0; i<m; i++) { 1632 PetscInt row = rdest[i],rowner; 1633 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1634 for (j=ai[i]; j<ai[i+1]; j++) { 1635 PetscInt cowner,col = cdest[aj[j]]; 1636 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1637 if (rowner == cowner) dnnz[i]++; 1638 else onnz[i]++; 1639 } 1640 for (j=bi[i]; j<bi[i+1]; j++) { 1641 PetscInt cowner,col = gcdest[bj[j]]; 1642 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1643 if (rowner == cowner) dnnz[i]++; 1644 else onnz[i]++; 1645 } 1646 } 1647 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1648 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1649 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1650 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1651 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1652 1653 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1654 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1655 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1656 for (i=0; i<m; i++) { 1657 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1658 PetscInt j0,rowlen; 1659 rowlen = ai[i+1] - ai[i]; 1660 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1661 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1662 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1663 } 1664 rowlen = bi[i+1] - bi[i]; 1665 for (j0=j=0; j<rowlen; j0=j) { 1666 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1667 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1668 } 1669 } 1670 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1671 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1672 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1673 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1674 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1675 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1676 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1677 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1678 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1679 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1680 *B = Aperm; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1686 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1687 { 1688 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1689 Mat A = mat->A,B = mat->B; 1690 PetscErrorCode ierr; 1691 PetscReal isend[5],irecv[5]; 1692 1693 PetscFunctionBegin; 1694 info->block_size = 1.0; 1695 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1696 1697 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1698 isend[3] = info->memory; isend[4] = info->mallocs; 1699 1700 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1701 1702 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1703 isend[3] += info->memory; isend[4] += info->mallocs; 1704 if (flag == MAT_LOCAL) { 1705 info->nz_used = isend[0]; 1706 info->nz_allocated = isend[1]; 1707 info->nz_unneeded = isend[2]; 1708 info->memory = isend[3]; 1709 info->mallocs = isend[4]; 1710 } else if (flag == MAT_GLOBAL_MAX) { 1711 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1712 1713 info->nz_used = irecv[0]; 1714 info->nz_allocated = irecv[1]; 1715 info->nz_unneeded = irecv[2]; 1716 info->memory = irecv[3]; 1717 info->mallocs = irecv[4]; 1718 } else if (flag == MAT_GLOBAL_SUM) { 1719 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1720 1721 info->nz_used = irecv[0]; 1722 info->nz_allocated = irecv[1]; 1723 info->nz_unneeded = irecv[2]; 1724 info->memory = irecv[3]; 1725 info->mallocs = irecv[4]; 1726 } 1727 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1728 info->fill_ratio_needed = 0; 1729 info->factor_mallocs = 0; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatSetOption_MPIAIJ" 1735 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1736 { 1737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 switch (op) { 1742 case MAT_NEW_NONZERO_LOCATIONS: 1743 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1744 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1745 case MAT_KEEP_NONZERO_PATTERN: 1746 case MAT_NEW_NONZERO_LOCATION_ERR: 1747 case MAT_USE_INODES: 1748 case MAT_IGNORE_ZERO_ENTRIES: 1749 MatCheckPreallocated(A,1); 1750 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1751 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1752 break; 1753 case MAT_ROW_ORIENTED: 1754 a->roworiented = flg; 1755 1756 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1757 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1758 break; 1759 case MAT_NEW_DIAGONALS: 1760 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1761 break; 1762 case MAT_IGNORE_OFF_PROC_ENTRIES: 1763 a->donotstash = flg; 1764 break; 1765 case MAT_SPD: 1766 A->spd_set = PETSC_TRUE; 1767 A->spd = flg; 1768 if (flg) { 1769 A->symmetric = PETSC_TRUE; 1770 A->structurally_symmetric = PETSC_TRUE; 1771 A->symmetric_set = PETSC_TRUE; 1772 A->structurally_symmetric_set = PETSC_TRUE; 1773 } 1774 break; 1775 case MAT_SYMMETRIC: 1776 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1777 break; 1778 case MAT_STRUCTURALLY_SYMMETRIC: 1779 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1780 break; 1781 case MAT_HERMITIAN: 1782 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1783 break; 1784 case MAT_SYMMETRY_ETERNAL: 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 break; 1787 default: 1788 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1789 } 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatGetRow_MPIAIJ" 1795 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1796 { 1797 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1798 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1799 PetscErrorCode ierr; 1800 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1801 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1802 PetscInt *cmap,*idx_p; 1803 1804 PetscFunctionBegin; 1805 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1806 mat->getrowactive = PETSC_TRUE; 1807 1808 if (!mat->rowvalues && (idx || v)) { 1809 /* 1810 allocate enough space to hold information from the longest row. 1811 */ 1812 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1813 PetscInt max = 1,tmp; 1814 for (i=0; i<matin->rmap->n; i++) { 1815 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1816 if (max < tmp) max = tmp; 1817 } 1818 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1819 } 1820 1821 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1822 lrow = row - rstart; 1823 1824 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1825 if (!v) {pvA = 0; pvB = 0;} 1826 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1827 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1828 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1829 nztot = nzA + nzB; 1830 1831 cmap = mat->garray; 1832 if (v || idx) { 1833 if (nztot) { 1834 /* Sort by increasing column numbers, assuming A and B already sorted */ 1835 PetscInt imark = -1; 1836 if (v) { 1837 *v = v_p = mat->rowvalues; 1838 for (i=0; i<nzB; i++) { 1839 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1840 else break; 1841 } 1842 imark = i; 1843 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1844 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1845 } 1846 if (idx) { 1847 *idx = idx_p = mat->rowindices; 1848 if (imark > -1) { 1849 for (i=0; i<imark; i++) { 1850 idx_p[i] = cmap[cworkB[i]]; 1851 } 1852 } else { 1853 for (i=0; i<nzB; i++) { 1854 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1855 else break; 1856 } 1857 imark = i; 1858 } 1859 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1860 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1861 } 1862 } else { 1863 if (idx) *idx = 0; 1864 if (v) *v = 0; 1865 } 1866 } 1867 *nz = nztot; 1868 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1869 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1875 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1876 { 1877 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1878 1879 PetscFunctionBegin; 1880 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1881 aij->getrowactive = PETSC_FALSE; 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatNorm_MPIAIJ" 1887 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1888 { 1889 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1890 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1891 PetscErrorCode ierr; 1892 PetscInt i,j,cstart = mat->cmap->rstart; 1893 PetscReal sum = 0.0; 1894 MatScalar *v; 1895 1896 PetscFunctionBegin; 1897 if (aij->size == 1) { 1898 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1899 } else { 1900 if (type == NORM_FROBENIUS) { 1901 v = amat->a; 1902 for (i=0; i<amat->nz; i++) { 1903 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1904 } 1905 v = bmat->a; 1906 for (i=0; i<bmat->nz; i++) { 1907 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1908 } 1909 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1910 *norm = PetscSqrtReal(*norm); 1911 } else if (type == NORM_1) { /* max column norm */ 1912 PetscReal *tmp,*tmp2; 1913 PetscInt *jj,*garray = aij->garray; 1914 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1915 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1916 *norm = 0.0; 1917 v = amat->a; jj = amat->j; 1918 for (j=0; j<amat->nz; j++) { 1919 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1920 } 1921 v = bmat->a; jj = bmat->j; 1922 for (j=0; j<bmat->nz; j++) { 1923 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1924 } 1925 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1926 for (j=0; j<mat->cmap->N; j++) { 1927 if (tmp2[j] > *norm) *norm = tmp2[j]; 1928 } 1929 ierr = PetscFree(tmp);CHKERRQ(ierr); 1930 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1931 } else if (type == NORM_INFINITY) { /* max row norm */ 1932 PetscReal ntemp = 0.0; 1933 for (j=0; j<aij->A->rmap->n; j++) { 1934 v = amat->a + amat->i[j]; 1935 sum = 0.0; 1936 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1937 sum += PetscAbsScalar(*v); v++; 1938 } 1939 v = bmat->a + bmat->i[j]; 1940 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1941 sum += PetscAbsScalar(*v); v++; 1942 } 1943 if (sum > ntemp) ntemp = sum; 1944 } 1945 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1946 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatTranspose_MPIAIJ" 1953 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1954 { 1955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1956 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1957 PetscErrorCode ierr; 1958 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1959 PetscInt cstart = A->cmap->rstart,ncol; 1960 Mat B; 1961 MatScalar *array; 1962 1963 PetscFunctionBegin; 1964 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1965 1966 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1967 ai = Aloc->i; aj = Aloc->j; 1968 bi = Bloc->i; bj = Bloc->j; 1969 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1970 PetscInt *d_nnz,*g_nnz,*o_nnz; 1971 PetscSFNode *oloc; 1972 PETSC_UNUSED PetscSF sf; 1973 1974 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1975 /* compute d_nnz for preallocation */ 1976 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1977 for (i=0; i<ai[ma]; i++) { 1978 d_nnz[aj[i]]++; 1979 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1980 } 1981 /* compute local off-diagonal contributions */ 1982 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1983 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1984 /* map those to global */ 1985 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1986 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1987 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1988 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1989 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1990 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1991 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1992 1993 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1994 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1995 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1996 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1997 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1998 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1999 } else { 2000 B = *matout; 2001 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2002 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2003 } 2004 2005 /* copy over the A part */ 2006 array = Aloc->a; 2007 row = A->rmap->rstart; 2008 for (i=0; i<ma; i++) { 2009 ncol = ai[i+1]-ai[i]; 2010 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2011 row++; 2012 array += ncol; aj += ncol; 2013 } 2014 aj = Aloc->j; 2015 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2016 2017 /* copy over the B part */ 2018 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2019 array = Bloc->a; 2020 row = A->rmap->rstart; 2021 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2022 cols_tmp = cols; 2023 for (i=0; i<mb; i++) { 2024 ncol = bi[i+1]-bi[i]; 2025 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2026 row++; 2027 array += ncol; cols_tmp += ncol; 2028 } 2029 ierr = PetscFree(cols);CHKERRQ(ierr); 2030 2031 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2032 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2033 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2034 *matout = B; 2035 } else { 2036 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2037 } 2038 PetscFunctionReturn(0); 2039 } 2040 2041 #undef __FUNCT__ 2042 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2043 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2044 { 2045 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2046 Mat a = aij->A,b = aij->B; 2047 PetscErrorCode ierr; 2048 PetscInt s1,s2,s3; 2049 2050 PetscFunctionBegin; 2051 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2052 if (rr) { 2053 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2054 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2055 /* Overlap communication with computation. */ 2056 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2057 } 2058 if (ll) { 2059 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2060 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2061 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2062 } 2063 /* scale the diagonal block */ 2064 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2065 2066 if (rr) { 2067 /* Do a scatter end and then right scale the off-diagonal block */ 2068 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2069 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2076 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2077 { 2078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2079 PetscErrorCode ierr; 2080 2081 PetscFunctionBegin; 2082 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatEqual_MPIAIJ" 2088 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2089 { 2090 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2091 Mat a,b,c,d; 2092 PetscBool flg; 2093 PetscErrorCode ierr; 2094 2095 PetscFunctionBegin; 2096 a = matA->A; b = matA->B; 2097 c = matB->A; d = matB->B; 2098 2099 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2100 if (flg) { 2101 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2102 } 2103 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2104 PetscFunctionReturn(0); 2105 } 2106 2107 #undef __FUNCT__ 2108 #define __FUNCT__ "MatCopy_MPIAIJ" 2109 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2110 { 2111 PetscErrorCode ierr; 2112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2113 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2114 2115 PetscFunctionBegin; 2116 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2117 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2118 /* because of the column compression in the off-processor part of the matrix a->B, 2119 the number of columns in a->B and b->B may be different, hence we cannot call 2120 the MatCopy() directly on the two parts. If need be, we can provide a more 2121 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2122 then copying the submatrices */ 2123 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2124 } else { 2125 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2126 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatSetUp_MPIAIJ" 2133 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2134 { 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2144 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2145 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2146 { 2147 PetscInt i,m=Y->rmap->N; 2148 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2149 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2150 const PetscInt *xi = x->i,*yi = y->i; 2151 2152 PetscFunctionBegin; 2153 /* Set the number of nonzeros in the new matrix */ 2154 for (i=0; i<m; i++) { 2155 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2156 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2157 nnz[i] = 0; 2158 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2159 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2160 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2161 nnz[i]++; 2162 } 2163 for (; k<nzy; k++) nnz[i]++; 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatAXPY_MPIAIJ" 2170 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2171 { 2172 PetscErrorCode ierr; 2173 PetscInt i; 2174 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2175 PetscBLASInt bnz,one=1; 2176 Mat_SeqAIJ *x,*y; 2177 2178 PetscFunctionBegin; 2179 if (str == SAME_NONZERO_PATTERN) { 2180 PetscScalar alpha = a; 2181 x = (Mat_SeqAIJ*)xx->A->data; 2182 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2183 y = (Mat_SeqAIJ*)yy->A->data; 2184 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2185 x = (Mat_SeqAIJ*)xx->B->data; 2186 y = (Mat_SeqAIJ*)yy->B->data; 2187 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2188 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2189 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2190 } else if (str == SUBSET_NONZERO_PATTERN) { 2191 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2192 2193 x = (Mat_SeqAIJ*)xx->B->data; 2194 y = (Mat_SeqAIJ*)yy->B->data; 2195 if (y->xtoy && y->XtoY != xx->B) { 2196 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2197 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2198 } 2199 if (!y->xtoy) { /* get xtoy */ 2200 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2201 y->XtoY = xx->B; 2202 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2203 } 2204 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2205 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2206 } else { 2207 Mat B; 2208 PetscInt *nnz_d,*nnz_o; 2209 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2210 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2211 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2212 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2213 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2214 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2215 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2216 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2217 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2218 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2219 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2220 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2221 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2222 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2223 } 2224 PetscFunctionReturn(0); 2225 } 2226 2227 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2228 2229 #undef __FUNCT__ 2230 #define __FUNCT__ "MatConjugate_MPIAIJ" 2231 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2232 { 2233 #if defined(PETSC_USE_COMPLEX) 2234 PetscErrorCode ierr; 2235 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2236 2237 PetscFunctionBegin; 2238 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2239 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2240 #else 2241 PetscFunctionBegin; 2242 #endif 2243 PetscFunctionReturn(0); 2244 } 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatRealPart_MPIAIJ" 2248 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2249 { 2250 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2251 PetscErrorCode ierr; 2252 2253 PetscFunctionBegin; 2254 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2255 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2261 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2262 { 2263 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2264 PetscErrorCode ierr; 2265 2266 PetscFunctionBegin; 2267 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2268 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #if defined(PETSC_HAVE_PBGL) 2273 2274 #include <boost/parallel/mpi/bsp_process_group.hpp> 2275 #include <boost/graph/distributed/ilu_default_graph.hpp> 2276 #include <boost/graph/distributed/ilu_0_block.hpp> 2277 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2278 #include <boost/graph/distributed/petsc/interface.hpp> 2279 #include <boost/multi_array.hpp> 2280 #include <boost/parallel/distributed_property_map->hpp> 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2284 /* 2285 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2286 */ 2287 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2288 { 2289 namespace petsc = boost::distributed::petsc; 2290 2291 namespace graph_dist = boost::graph::distributed; 2292 using boost::graph::distributed::ilu_default::process_group_type; 2293 using boost::graph::ilu_permuted; 2294 2295 PetscBool row_identity, col_identity; 2296 PetscContainer c; 2297 PetscInt m, n, M, N; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2302 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2303 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2304 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2305 2306 process_group_type pg; 2307 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2308 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2309 lgraph_type& level_graph = *lgraph_p; 2310 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2311 2312 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2313 ilu_permuted(level_graph); 2314 2315 /* put together the new matrix */ 2316 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2317 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2318 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2319 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2320 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2321 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2322 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2323 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2324 2325 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2326 ierr = PetscContainerSetPointer(c, lgraph_p); 2327 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2328 ierr = PetscContainerDestroy(&c); 2329 PetscFunctionReturn(0); 2330 } 2331 2332 #undef __FUNCT__ 2333 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2334 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2335 { 2336 PetscFunctionBegin; 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatSolve_MPIAIJ" 2342 /* 2343 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2344 */ 2345 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2346 { 2347 namespace graph_dist = boost::graph::distributed; 2348 2349 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2350 lgraph_type *lgraph_p; 2351 PetscContainer c; 2352 PetscErrorCode ierr; 2353 2354 PetscFunctionBegin; 2355 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2356 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2357 ierr = VecCopy(b, x);CHKERRQ(ierr); 2358 2359 PetscScalar *array_x; 2360 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2361 PetscInt sx; 2362 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2363 2364 PetscScalar *array_b; 2365 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2366 PetscInt sb; 2367 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2368 2369 lgraph_type& level_graph = *lgraph_p; 2370 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2371 2372 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2373 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2374 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2375 2376 typedef boost::iterator_property_map<array_ref_type::iterator, 2377 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2378 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2379 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2380 2381 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2382 PetscFunctionReturn(0); 2383 } 2384 #endif 2385 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2389 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2390 { 2391 PetscMPIInt rank,size; 2392 MPI_Comm comm; 2393 PetscErrorCode ierr; 2394 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2395 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2396 PetscInt *rowrange = mat->rmap->range; 2397 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2398 Mat A = aij->A,B=aij->B,C=*matredundant; 2399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2400 PetscScalar *sbuf_a; 2401 PetscInt nzlocal=a->nz+b->nz; 2402 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2403 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2404 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2405 MatScalar *aworkA,*aworkB; 2406 PetscScalar *vals; 2407 PetscMPIInt tag1,tag2,tag3,imdex; 2408 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2409 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2410 MPI_Status recv_status,*send_status; 2411 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2412 PetscInt **rbuf_j=NULL; 2413 PetscScalar **rbuf_a=NULL; 2414 Mat_Redundant *redund =NULL; 2415 2416 PetscFunctionBegin; 2417 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2418 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2419 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2420 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2421 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2422 2423 if (reuse == MAT_REUSE_MATRIX) { 2424 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2425 if (subsize == 1) { 2426 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2427 redund = c->redundant; 2428 } else { 2429 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2430 redund = c->redundant; 2431 } 2432 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2433 2434 nsends = redund->nsends; 2435 nrecvs = redund->nrecvs; 2436 send_rank = redund->send_rank; 2437 recv_rank = redund->recv_rank; 2438 sbuf_nz = redund->sbuf_nz; 2439 rbuf_nz = redund->rbuf_nz; 2440 sbuf_j = redund->sbuf_j; 2441 sbuf_a = redund->sbuf_a; 2442 rbuf_j = redund->rbuf_j; 2443 rbuf_a = redund->rbuf_a; 2444 } 2445 2446 if (reuse == MAT_INITIAL_MATRIX) { 2447 PetscInt nleftover,np_subcomm; 2448 2449 /* get the destination processors' id send_rank, nsends and nrecvs */ 2450 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2451 2452 np_subcomm = size/nsubcomm; 2453 nleftover = size - nsubcomm*np_subcomm; 2454 2455 /* block of codes below is specific for INTERLACED */ 2456 /* ------------------------------------------------*/ 2457 nsends = 0; nrecvs = 0; 2458 for (i=0; i<size; i++) { 2459 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2460 send_rank[nsends++] = i; 2461 recv_rank[nrecvs++] = i; 2462 } 2463 } 2464 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2465 i = size-nleftover-1; 2466 j = 0; 2467 while (j < nsubcomm - nleftover) { 2468 send_rank[nsends++] = i; 2469 i--; j++; 2470 } 2471 } 2472 2473 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2474 for (i=0; i<nleftover; i++) { 2475 recv_rank[nrecvs++] = size-nleftover+i; 2476 } 2477 } 2478 /*----------------------------------------------*/ 2479 2480 /* allocate sbuf_j, sbuf_a */ 2481 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2482 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2483 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2484 /* 2485 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2486 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2487 */ 2488 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2489 2490 /* copy mat's local entries into the buffers */ 2491 if (reuse == MAT_INITIAL_MATRIX) { 2492 rownz_max = 0; 2493 rptr = sbuf_j; 2494 cols = sbuf_j + rend-rstart + 1; 2495 vals = sbuf_a; 2496 rptr[0] = 0; 2497 for (i=0; i<rend-rstart; i++) { 2498 row = i + rstart; 2499 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2500 ncols = nzA + nzB; 2501 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2502 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2503 /* load the column indices for this row into cols */ 2504 lwrite = 0; 2505 for (l=0; l<nzB; l++) { 2506 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2507 vals[lwrite] = aworkB[l]; 2508 cols[lwrite++] = ctmp; 2509 } 2510 } 2511 for (l=0; l<nzA; l++) { 2512 vals[lwrite] = aworkA[l]; 2513 cols[lwrite++] = cstart + cworkA[l]; 2514 } 2515 for (l=0; l<nzB; l++) { 2516 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2517 vals[lwrite] = aworkB[l]; 2518 cols[lwrite++] = ctmp; 2519 } 2520 } 2521 vals += ncols; 2522 cols += ncols; 2523 rptr[i+1] = rptr[i] + ncols; 2524 if (rownz_max < ncols) rownz_max = ncols; 2525 } 2526 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2527 } else { /* only copy matrix values into sbuf_a */ 2528 rptr = sbuf_j; 2529 vals = sbuf_a; 2530 rptr[0] = 0; 2531 for (i=0; i<rend-rstart; i++) { 2532 row = i + rstart; 2533 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2534 ncols = nzA + nzB; 2535 cworkB = b->j + b->i[i]; 2536 aworkA = a->a + a->i[i]; 2537 aworkB = b->a + b->i[i]; 2538 lwrite = 0; 2539 for (l=0; l<nzB; l++) { 2540 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2541 } 2542 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2543 for (l=0; l<nzB; l++) { 2544 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2545 } 2546 vals += ncols; 2547 rptr[i+1] = rptr[i] + ncols; 2548 } 2549 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2550 2551 /* send nzlocal to others, and recv other's nzlocal */ 2552 /*--------------------------------------------------*/ 2553 if (reuse == MAT_INITIAL_MATRIX) { 2554 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2555 2556 s_waits2 = s_waits3 + nsends; 2557 s_waits1 = s_waits2 + nsends; 2558 r_waits1 = s_waits1 + nsends; 2559 r_waits2 = r_waits1 + nrecvs; 2560 r_waits3 = r_waits2 + nrecvs; 2561 } else { 2562 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2563 2564 r_waits3 = s_waits3 + nsends; 2565 } 2566 2567 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2568 if (reuse == MAT_INITIAL_MATRIX) { 2569 /* get new tags to keep the communication clean */ 2570 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2571 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2572 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2573 2574 /* post receives of other's nzlocal */ 2575 for (i=0; i<nrecvs; i++) { 2576 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2577 } 2578 /* send nzlocal to others */ 2579 for (i=0; i<nsends; i++) { 2580 sbuf_nz[i] = nzlocal; 2581 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2582 } 2583 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2584 count = nrecvs; 2585 while (count) { 2586 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2587 2588 recv_rank[imdex] = recv_status.MPI_SOURCE; 2589 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2590 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2591 2592 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2593 2594 rbuf_nz[imdex] += i + 2; 2595 2596 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2597 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2598 count--; 2599 } 2600 /* wait on sends of nzlocal */ 2601 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2602 /* send mat->i,j to others, and recv from other's */ 2603 /*------------------------------------------------*/ 2604 for (i=0; i<nsends; i++) { 2605 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2606 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2607 } 2608 /* wait on receives of mat->i,j */ 2609 /*------------------------------*/ 2610 count = nrecvs; 2611 while (count) { 2612 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2613 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2614 count--; 2615 } 2616 /* wait on sends of mat->i,j */ 2617 /*---------------------------*/ 2618 if (nsends) { 2619 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2620 } 2621 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2622 2623 /* post receives, send and receive mat->a */ 2624 /*----------------------------------------*/ 2625 for (imdex=0; imdex<nrecvs; imdex++) { 2626 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2627 } 2628 for (i=0; i<nsends; i++) { 2629 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2630 } 2631 count = nrecvs; 2632 while (count) { 2633 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2634 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2635 count--; 2636 } 2637 if (nsends) { 2638 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2639 } 2640 2641 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2642 2643 /* create redundant matrix */ 2644 /*-------------------------*/ 2645 if (reuse == MAT_INITIAL_MATRIX) { 2646 const PetscInt *range; 2647 PetscInt rstart_sub,rend_sub,mloc_sub; 2648 2649 /* compute rownz_max for preallocation */ 2650 for (imdex=0; imdex<nrecvs; imdex++) { 2651 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2652 rptr = rbuf_j[imdex]; 2653 for (i=0; i<j; i++) { 2654 ncols = rptr[i+1] - rptr[i]; 2655 if (rownz_max < ncols) rownz_max = ncols; 2656 } 2657 } 2658 2659 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2660 2661 /* get local size of redundant matrix 2662 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2663 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2664 rstart_sub = range[nsubcomm*subrank]; 2665 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2666 rend_sub = range[nsubcomm*(subrank+1)]; 2667 } else { 2668 rend_sub = mat->rmap->N; 2669 } 2670 mloc_sub = rend_sub - rstart_sub; 2671 2672 if (M == N) { 2673 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2674 } else { /* non-square matrix */ 2675 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2676 } 2677 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2678 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2679 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2680 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2681 } else { 2682 C = *matredundant; 2683 } 2684 2685 /* insert local matrix entries */ 2686 rptr = sbuf_j; 2687 cols = sbuf_j + rend-rstart + 1; 2688 vals = sbuf_a; 2689 for (i=0; i<rend-rstart; i++) { 2690 row = i + rstart; 2691 ncols = rptr[i+1] - rptr[i]; 2692 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2693 vals += ncols; 2694 cols += ncols; 2695 } 2696 /* insert received matrix entries */ 2697 for (imdex=0; imdex<nrecvs; imdex++) { 2698 rstart = rowrange[recv_rank[imdex]]; 2699 rend = rowrange[recv_rank[imdex]+1]; 2700 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2701 rptr = rbuf_j[imdex]; 2702 cols = rbuf_j[imdex] + rend-rstart + 1; 2703 vals = rbuf_a[imdex]; 2704 for (i=0; i<rend-rstart; i++) { 2705 row = i + rstart; 2706 ncols = rptr[i+1] - rptr[i]; 2707 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2708 vals += ncols; 2709 cols += ncols; 2710 } 2711 } 2712 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2713 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2714 2715 if (reuse == MAT_INITIAL_MATRIX) { 2716 *matredundant = C; 2717 2718 /* create a supporting struct and attach it to C for reuse */ 2719 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2720 if (subsize == 1) { 2721 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2722 c->redundant = redund; 2723 } else { 2724 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2725 c->redundant = redund; 2726 } 2727 2728 redund->nzlocal = nzlocal; 2729 redund->nsends = nsends; 2730 redund->nrecvs = nrecvs; 2731 redund->send_rank = send_rank; 2732 redund->recv_rank = recv_rank; 2733 redund->sbuf_nz = sbuf_nz; 2734 redund->rbuf_nz = rbuf_nz; 2735 redund->sbuf_j = sbuf_j; 2736 redund->sbuf_a = sbuf_a; 2737 redund->rbuf_j = rbuf_j; 2738 redund->rbuf_a = rbuf_a; 2739 redund->psubcomm = NULL; 2740 } 2741 PetscFunctionReturn(0); 2742 } 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2746 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2747 { 2748 PetscErrorCode ierr; 2749 MPI_Comm comm; 2750 PetscMPIInt size,subsize; 2751 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2752 Mat_Redundant *redund=NULL; 2753 PetscSubcomm psubcomm=NULL; 2754 MPI_Comm subcomm_in=subcomm; 2755 Mat *matseq; 2756 IS isrow,iscol; 2757 2758 PetscFunctionBegin; 2759 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2760 if (reuse == MAT_INITIAL_MATRIX) { 2761 /* create psubcomm, then get subcomm */ 2762 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2763 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2764 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2765 2766 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2767 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2768 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2769 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2770 subcomm = psubcomm->comm; 2771 } else { /* retrieve psubcomm and subcomm */ 2772 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2773 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2774 if (subsize == 1) { 2775 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2776 redund = c->redundant; 2777 } else { 2778 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2779 redund = c->redundant; 2780 } 2781 psubcomm = redund->psubcomm; 2782 } 2783 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2784 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2785 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2786 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2787 if (subsize == 1) { 2788 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2789 c->redundant->psubcomm = psubcomm; 2790 } else { 2791 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2792 c->redundant->psubcomm = psubcomm ; 2793 } 2794 } 2795 PetscFunctionReturn(0); 2796 } 2797 } 2798 2799 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2800 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2801 if (reuse == MAT_INITIAL_MATRIX) { 2802 /* create a local sequential matrix matseq[0] */ 2803 mloc_sub = PETSC_DECIDE; 2804 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2805 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2806 rstart = rend - mloc_sub; 2807 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2808 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2809 } else { /* reuse == MAT_REUSE_MATRIX */ 2810 if (subsize == 1) { 2811 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2812 redund = c->redundant; 2813 } else { 2814 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2815 redund = c->redundant; 2816 } 2817 2818 isrow = redund->isrow; 2819 iscol = redund->iscol; 2820 matseq = redund->matseq; 2821 } 2822 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2823 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2824 2825 if (reuse == MAT_INITIAL_MATRIX) { 2826 /* create a supporting struct and attach it to C for reuse */ 2827 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2828 if (subsize == 1) { 2829 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2830 c->redundant = redund; 2831 } else { 2832 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2833 c->redundant = redund; 2834 } 2835 redund->isrow = isrow; 2836 redund->iscol = iscol; 2837 redund->matseq = matseq; 2838 redund->psubcomm = psubcomm; 2839 } 2840 PetscFunctionReturn(0); 2841 } 2842 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2845 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2846 { 2847 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2848 PetscErrorCode ierr; 2849 PetscInt i,*idxb = 0; 2850 PetscScalar *va,*vb; 2851 Vec vtmp; 2852 2853 PetscFunctionBegin; 2854 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2855 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2856 if (idx) { 2857 for (i=0; i<A->rmap->n; i++) { 2858 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2859 } 2860 } 2861 2862 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2863 if (idx) { 2864 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2865 } 2866 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2867 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2868 2869 for (i=0; i<A->rmap->n; i++) { 2870 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2871 va[i] = vb[i]; 2872 if (idx) idx[i] = a->garray[idxb[i]]; 2873 } 2874 } 2875 2876 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2877 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2878 ierr = PetscFree(idxb);CHKERRQ(ierr); 2879 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2880 PetscFunctionReturn(0); 2881 } 2882 2883 #undef __FUNCT__ 2884 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2885 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2886 { 2887 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2888 PetscErrorCode ierr; 2889 PetscInt i,*idxb = 0; 2890 PetscScalar *va,*vb; 2891 Vec vtmp; 2892 2893 PetscFunctionBegin; 2894 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2895 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2896 if (idx) { 2897 for (i=0; i<A->cmap->n; i++) { 2898 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2899 } 2900 } 2901 2902 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2903 if (idx) { 2904 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2905 } 2906 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2907 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2908 2909 for (i=0; i<A->rmap->n; i++) { 2910 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2911 va[i] = vb[i]; 2912 if (idx) idx[i] = a->garray[idxb[i]]; 2913 } 2914 } 2915 2916 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2917 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2918 ierr = PetscFree(idxb);CHKERRQ(ierr); 2919 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2920 PetscFunctionReturn(0); 2921 } 2922 2923 #undef __FUNCT__ 2924 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2925 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2926 { 2927 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2928 PetscInt n = A->rmap->n; 2929 PetscInt cstart = A->cmap->rstart; 2930 PetscInt *cmap = mat->garray; 2931 PetscInt *diagIdx, *offdiagIdx; 2932 Vec diagV, offdiagV; 2933 PetscScalar *a, *diagA, *offdiagA; 2934 PetscInt r; 2935 PetscErrorCode ierr; 2936 2937 PetscFunctionBegin; 2938 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2939 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2940 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2941 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2942 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2943 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2944 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2945 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2946 for (r = 0; r < n; ++r) { 2947 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2948 a[r] = diagA[r]; 2949 idx[r] = cstart + diagIdx[r]; 2950 } else { 2951 a[r] = offdiagA[r]; 2952 idx[r] = cmap[offdiagIdx[r]]; 2953 } 2954 } 2955 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2956 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2957 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2958 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2959 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2960 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 #undef __FUNCT__ 2965 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2966 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2967 { 2968 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2969 PetscInt n = A->rmap->n; 2970 PetscInt cstart = A->cmap->rstart; 2971 PetscInt *cmap = mat->garray; 2972 PetscInt *diagIdx, *offdiagIdx; 2973 Vec diagV, offdiagV; 2974 PetscScalar *a, *diagA, *offdiagA; 2975 PetscInt r; 2976 PetscErrorCode ierr; 2977 2978 PetscFunctionBegin; 2979 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2980 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2981 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2982 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2983 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2984 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2985 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2986 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2987 for (r = 0; r < n; ++r) { 2988 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2989 a[r] = diagA[r]; 2990 idx[r] = cstart + diagIdx[r]; 2991 } else { 2992 a[r] = offdiagA[r]; 2993 idx[r] = cmap[offdiagIdx[r]]; 2994 } 2995 } 2996 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2997 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2998 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2999 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3000 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3001 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3002 PetscFunctionReturn(0); 3003 } 3004 3005 #undef __FUNCT__ 3006 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3007 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3008 { 3009 PetscErrorCode ierr; 3010 Mat *dummy; 3011 3012 PetscFunctionBegin; 3013 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3014 *newmat = *dummy; 3015 ierr = PetscFree(dummy);CHKERRQ(ierr); 3016 PetscFunctionReturn(0); 3017 } 3018 3019 #undef __FUNCT__ 3020 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3021 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3022 { 3023 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3033 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3034 { 3035 PetscErrorCode ierr; 3036 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3037 3038 PetscFunctionBegin; 3039 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3040 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3041 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3042 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3043 PetscFunctionReturn(0); 3044 } 3045 3046 /* -------------------------------------------------------------------*/ 3047 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3048 MatGetRow_MPIAIJ, 3049 MatRestoreRow_MPIAIJ, 3050 MatMult_MPIAIJ, 3051 /* 4*/ MatMultAdd_MPIAIJ, 3052 MatMultTranspose_MPIAIJ, 3053 MatMultTransposeAdd_MPIAIJ, 3054 #if defined(PETSC_HAVE_PBGL) 3055 MatSolve_MPIAIJ, 3056 #else 3057 0, 3058 #endif 3059 0, 3060 0, 3061 /*10*/ 0, 3062 0, 3063 0, 3064 MatSOR_MPIAIJ, 3065 MatTranspose_MPIAIJ, 3066 /*15*/ MatGetInfo_MPIAIJ, 3067 MatEqual_MPIAIJ, 3068 MatGetDiagonal_MPIAIJ, 3069 MatDiagonalScale_MPIAIJ, 3070 MatNorm_MPIAIJ, 3071 /*20*/ MatAssemblyBegin_MPIAIJ, 3072 MatAssemblyEnd_MPIAIJ, 3073 MatSetOption_MPIAIJ, 3074 MatZeroEntries_MPIAIJ, 3075 /*24*/ MatZeroRows_MPIAIJ, 3076 0, 3077 #if defined(PETSC_HAVE_PBGL) 3078 0, 3079 #else 3080 0, 3081 #endif 3082 0, 3083 0, 3084 /*29*/ MatSetUp_MPIAIJ, 3085 #if defined(PETSC_HAVE_PBGL) 3086 0, 3087 #else 3088 0, 3089 #endif 3090 0, 3091 0, 3092 0, 3093 /*34*/ MatDuplicate_MPIAIJ, 3094 0, 3095 0, 3096 0, 3097 0, 3098 /*39*/ MatAXPY_MPIAIJ, 3099 MatGetSubMatrices_MPIAIJ, 3100 MatIncreaseOverlap_MPIAIJ, 3101 MatGetValues_MPIAIJ, 3102 MatCopy_MPIAIJ, 3103 /*44*/ MatGetRowMax_MPIAIJ, 3104 MatScale_MPIAIJ, 3105 0, 3106 0, 3107 MatZeroRowsColumns_MPIAIJ, 3108 /*49*/ MatSetRandom_MPIAIJ, 3109 0, 3110 0, 3111 0, 3112 0, 3113 /*54*/ MatFDColoringCreate_MPIXAIJ, 3114 0, 3115 MatSetUnfactored_MPIAIJ, 3116 MatPermute_MPIAIJ, 3117 0, 3118 /*59*/ MatGetSubMatrix_MPIAIJ, 3119 MatDestroy_MPIAIJ, 3120 MatView_MPIAIJ, 3121 0, 3122 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3123 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3124 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3125 0, 3126 0, 3127 0, 3128 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3129 MatGetRowMinAbs_MPIAIJ, 3130 0, 3131 MatSetColoring_MPIAIJ, 3132 0, 3133 MatSetValuesAdifor_MPIAIJ, 3134 /*75*/ MatFDColoringApply_AIJ, 3135 0, 3136 0, 3137 0, 3138 MatFindZeroDiagonals_MPIAIJ, 3139 /*80*/ 0, 3140 0, 3141 0, 3142 /*83*/ MatLoad_MPIAIJ, 3143 0, 3144 0, 3145 0, 3146 0, 3147 0, 3148 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3149 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3150 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3151 MatPtAP_MPIAIJ_MPIAIJ, 3152 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3153 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3154 0, 3155 0, 3156 0, 3157 0, 3158 /*99*/ 0, 3159 0, 3160 0, 3161 MatConjugate_MPIAIJ, 3162 0, 3163 /*104*/MatSetValuesRow_MPIAIJ, 3164 MatRealPart_MPIAIJ, 3165 MatImaginaryPart_MPIAIJ, 3166 0, 3167 0, 3168 /*109*/0, 3169 MatGetRedundantMatrix_MPIAIJ, 3170 MatGetRowMin_MPIAIJ, 3171 0, 3172 0, 3173 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3174 0, 3175 0, 3176 0, 3177 0, 3178 /*119*/0, 3179 0, 3180 0, 3181 0, 3182 MatGetMultiProcBlock_MPIAIJ, 3183 /*124*/MatFindNonzeroRows_MPIAIJ, 3184 MatGetColumnNorms_MPIAIJ, 3185 MatInvertBlockDiagonal_MPIAIJ, 3186 0, 3187 MatGetSubMatricesParallel_MPIAIJ, 3188 /*129*/0, 3189 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3190 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3191 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3192 0, 3193 /*134*/0, 3194 0, 3195 0, 3196 0, 3197 0, 3198 /*139*/0, 3199 0, 3200 0, 3201 MatFDColoringSetUp_MPIXAIJ 3202 }; 3203 3204 /* ----------------------------------------------------------------------------------------*/ 3205 3206 #undef __FUNCT__ 3207 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3208 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3209 { 3210 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3211 PetscErrorCode ierr; 3212 3213 PetscFunctionBegin; 3214 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3215 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3216 PetscFunctionReturn(0); 3217 } 3218 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3221 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3222 { 3223 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3224 PetscErrorCode ierr; 3225 3226 PetscFunctionBegin; 3227 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3228 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3229 PetscFunctionReturn(0); 3230 } 3231 3232 #undef __FUNCT__ 3233 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3234 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3235 { 3236 Mat_MPIAIJ *b; 3237 PetscErrorCode ierr; 3238 3239 PetscFunctionBegin; 3240 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3241 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3242 b = (Mat_MPIAIJ*)B->data; 3243 3244 if (!B->preallocated) { 3245 /* Explicitly create 2 MATSEQAIJ matrices. */ 3246 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3247 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3248 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3249 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3250 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3251 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3252 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3253 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3254 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3255 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3256 } 3257 3258 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3259 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3260 B->preallocated = PETSC_TRUE; 3261 PetscFunctionReturn(0); 3262 } 3263 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3266 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3267 { 3268 Mat mat; 3269 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3270 PetscErrorCode ierr; 3271 3272 PetscFunctionBegin; 3273 *newmat = 0; 3274 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3275 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3276 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3277 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3278 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3279 a = (Mat_MPIAIJ*)mat->data; 3280 3281 mat->factortype = matin->factortype; 3282 mat->assembled = PETSC_TRUE; 3283 mat->insertmode = NOT_SET_VALUES; 3284 mat->preallocated = PETSC_TRUE; 3285 3286 a->size = oldmat->size; 3287 a->rank = oldmat->rank; 3288 a->donotstash = oldmat->donotstash; 3289 a->roworiented = oldmat->roworiented; 3290 a->rowindices = 0; 3291 a->rowvalues = 0; 3292 a->getrowactive = PETSC_FALSE; 3293 3294 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3295 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3296 3297 if (oldmat->colmap) { 3298 #if defined(PETSC_USE_CTABLE) 3299 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3300 #else 3301 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3302 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3303 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3304 #endif 3305 } else a->colmap = 0; 3306 if (oldmat->garray) { 3307 PetscInt len; 3308 len = oldmat->B->cmap->n; 3309 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3310 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3311 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3312 } else a->garray = 0; 3313 3314 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3315 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3316 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3317 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3318 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3319 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3320 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3321 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3322 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3323 *newmat = mat; 3324 PetscFunctionReturn(0); 3325 } 3326 3327 3328 3329 #undef __FUNCT__ 3330 #define __FUNCT__ "MatLoad_MPIAIJ" 3331 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3332 { 3333 PetscScalar *vals,*svals; 3334 MPI_Comm comm; 3335 PetscErrorCode ierr; 3336 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3337 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3338 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3339 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3340 PetscInt cend,cstart,n,*rowners,sizesset=1; 3341 int fd; 3342 PetscInt bs = 1; 3343 3344 PetscFunctionBegin; 3345 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3346 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3347 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3348 if (!rank) { 3349 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3350 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3351 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3352 } 3353 3354 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3355 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3356 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3357 3358 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3359 3360 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3361 M = header[1]; N = header[2]; 3362 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3363 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3364 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3365 3366 /* If global sizes are set, check if they are consistent with that given in the file */ 3367 if (sizesset) { 3368 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3369 } 3370 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3371 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3372 3373 /* determine ownership of all (block) rows */ 3374 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3375 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3376 else m = newMat->rmap->n; /* Set by user */ 3377 3378 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3379 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3380 3381 /* First process needs enough room for process with most rows */ 3382 if (!rank) { 3383 mmax = rowners[1]; 3384 for (i=2; i<=size; i++) { 3385 mmax = PetscMax(mmax, rowners[i]); 3386 } 3387 } else mmax = -1; /* unused, but compilers complain */ 3388 3389 rowners[0] = 0; 3390 for (i=2; i<=size; i++) { 3391 rowners[i] += rowners[i-1]; 3392 } 3393 rstart = rowners[rank]; 3394 rend = rowners[rank+1]; 3395 3396 /* distribute row lengths to all processors */ 3397 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3398 if (!rank) { 3399 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3400 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3401 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3402 for (j=0; j<m; j++) { 3403 procsnz[0] += ourlens[j]; 3404 } 3405 for (i=1; i<size; i++) { 3406 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3407 /* calculate the number of nonzeros on each processor */ 3408 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3409 procsnz[i] += rowlengths[j]; 3410 } 3411 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3412 } 3413 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3414 } else { 3415 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3416 } 3417 3418 if (!rank) { 3419 /* determine max buffer needed and allocate it */ 3420 maxnz = 0; 3421 for (i=0; i<size; i++) { 3422 maxnz = PetscMax(maxnz,procsnz[i]); 3423 } 3424 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3425 3426 /* read in my part of the matrix column indices */ 3427 nz = procsnz[0]; 3428 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3429 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3430 3431 /* read in every one elses and ship off */ 3432 for (i=1; i<size; i++) { 3433 nz = procsnz[i]; 3434 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3435 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3436 } 3437 ierr = PetscFree(cols);CHKERRQ(ierr); 3438 } else { 3439 /* determine buffer space needed for message */ 3440 nz = 0; 3441 for (i=0; i<m; i++) { 3442 nz += ourlens[i]; 3443 } 3444 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3445 3446 /* receive message of column indices*/ 3447 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3448 } 3449 3450 /* determine column ownership if matrix is not square */ 3451 if (N != M) { 3452 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3453 else n = newMat->cmap->n; 3454 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3455 cstart = cend - n; 3456 } else { 3457 cstart = rstart; 3458 cend = rend; 3459 n = cend - cstart; 3460 } 3461 3462 /* loop over local rows, determining number of off diagonal entries */ 3463 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3464 jj = 0; 3465 for (i=0; i<m; i++) { 3466 for (j=0; j<ourlens[i]; j++) { 3467 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3468 jj++; 3469 } 3470 } 3471 3472 for (i=0; i<m; i++) { 3473 ourlens[i] -= offlens[i]; 3474 } 3475 if (!sizesset) { 3476 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3477 } 3478 3479 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3480 3481 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3482 3483 for (i=0; i<m; i++) { 3484 ourlens[i] += offlens[i]; 3485 } 3486 3487 if (!rank) { 3488 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3489 3490 /* read in my part of the matrix numerical values */ 3491 nz = procsnz[0]; 3492 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3493 3494 /* insert into matrix */ 3495 jj = rstart; 3496 smycols = mycols; 3497 svals = vals; 3498 for (i=0; i<m; i++) { 3499 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3500 smycols += ourlens[i]; 3501 svals += ourlens[i]; 3502 jj++; 3503 } 3504 3505 /* read in other processors and ship out */ 3506 for (i=1; i<size; i++) { 3507 nz = procsnz[i]; 3508 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3509 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3510 } 3511 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3512 } else { 3513 /* receive numeric values */ 3514 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3515 3516 /* receive message of values*/ 3517 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3518 3519 /* insert into matrix */ 3520 jj = rstart; 3521 smycols = mycols; 3522 svals = vals; 3523 for (i=0; i<m; i++) { 3524 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3525 smycols += ourlens[i]; 3526 svals += ourlens[i]; 3527 jj++; 3528 } 3529 } 3530 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3531 ierr = PetscFree(vals);CHKERRQ(ierr); 3532 ierr = PetscFree(mycols);CHKERRQ(ierr); 3533 ierr = PetscFree(rowners);CHKERRQ(ierr); 3534 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3535 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3536 PetscFunctionReturn(0); 3537 } 3538 3539 #undef __FUNCT__ 3540 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3541 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3542 { 3543 PetscErrorCode ierr; 3544 IS iscol_local; 3545 PetscInt csize; 3546 3547 PetscFunctionBegin; 3548 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3549 if (call == MAT_REUSE_MATRIX) { 3550 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3551 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3552 } else { 3553 PetscInt cbs; 3554 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3555 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3556 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3557 } 3558 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3559 if (call == MAT_INITIAL_MATRIX) { 3560 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3561 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3562 } 3563 PetscFunctionReturn(0); 3564 } 3565 3566 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3567 #undef __FUNCT__ 3568 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3569 /* 3570 Not great since it makes two copies of the submatrix, first an SeqAIJ 3571 in local and then by concatenating the local matrices the end result. 3572 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3573 3574 Note: This requires a sequential iscol with all indices. 3575 */ 3576 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3577 { 3578 PetscErrorCode ierr; 3579 PetscMPIInt rank,size; 3580 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3581 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3582 PetscBool allcolumns, colflag; 3583 Mat M,Mreuse; 3584 MatScalar *vwork,*aa; 3585 MPI_Comm comm; 3586 Mat_SeqAIJ *aij; 3587 3588 PetscFunctionBegin; 3589 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3590 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3591 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3592 3593 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3594 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3595 if (colflag && ncol == mat->cmap->N) { 3596 allcolumns = PETSC_TRUE; 3597 } else { 3598 allcolumns = PETSC_FALSE; 3599 } 3600 if (call == MAT_REUSE_MATRIX) { 3601 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3602 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3603 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3604 } else { 3605 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3606 } 3607 3608 /* 3609 m - number of local rows 3610 n - number of columns (same on all processors) 3611 rstart - first row in new global matrix generated 3612 */ 3613 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3614 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3615 if (call == MAT_INITIAL_MATRIX) { 3616 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3617 ii = aij->i; 3618 jj = aij->j; 3619 3620 /* 3621 Determine the number of non-zeros in the diagonal and off-diagonal 3622 portions of the matrix in order to do correct preallocation 3623 */ 3624 3625 /* first get start and end of "diagonal" columns */ 3626 if (csize == PETSC_DECIDE) { 3627 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3628 if (mglobal == n) { /* square matrix */ 3629 nlocal = m; 3630 } else { 3631 nlocal = n/size + ((n % size) > rank); 3632 } 3633 } else { 3634 nlocal = csize; 3635 } 3636 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3637 rstart = rend - nlocal; 3638 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3639 3640 /* next, compute all the lengths */ 3641 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3642 olens = dlens + m; 3643 for (i=0; i<m; i++) { 3644 jend = ii[i+1] - ii[i]; 3645 olen = 0; 3646 dlen = 0; 3647 for (j=0; j<jend; j++) { 3648 if (*jj < rstart || *jj >= rend) olen++; 3649 else dlen++; 3650 jj++; 3651 } 3652 olens[i] = olen; 3653 dlens[i] = dlen; 3654 } 3655 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3656 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3657 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3658 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3659 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3660 ierr = PetscFree(dlens);CHKERRQ(ierr); 3661 } else { 3662 PetscInt ml,nl; 3663 3664 M = *newmat; 3665 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3666 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3667 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3668 /* 3669 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3670 rather than the slower MatSetValues(). 3671 */ 3672 M->was_assembled = PETSC_TRUE; 3673 M->assembled = PETSC_FALSE; 3674 } 3675 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3676 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3677 ii = aij->i; 3678 jj = aij->j; 3679 aa = aij->a; 3680 for (i=0; i<m; i++) { 3681 row = rstart + i; 3682 nz = ii[i+1] - ii[i]; 3683 cwork = jj; jj += nz; 3684 vwork = aa; aa += nz; 3685 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3686 } 3687 3688 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3689 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3690 *newmat = M; 3691 3692 /* save submatrix used in processor for next request */ 3693 if (call == MAT_INITIAL_MATRIX) { 3694 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3695 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3696 } 3697 PetscFunctionReturn(0); 3698 } 3699 3700 #undef __FUNCT__ 3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3702 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3703 { 3704 PetscInt m,cstart, cend,j,nnz,i,d; 3705 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3706 const PetscInt *JJ; 3707 PetscScalar *values; 3708 PetscErrorCode ierr; 3709 3710 PetscFunctionBegin; 3711 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3712 3713 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3714 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3715 m = B->rmap->n; 3716 cstart = B->cmap->rstart; 3717 cend = B->cmap->rend; 3718 rstart = B->rmap->rstart; 3719 3720 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3721 3722 #if defined(PETSC_USE_DEBUGGING) 3723 for (i=0; i<m; i++) { 3724 nnz = Ii[i+1]- Ii[i]; 3725 JJ = J + Ii[i]; 3726 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3727 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3728 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3729 } 3730 #endif 3731 3732 for (i=0; i<m; i++) { 3733 nnz = Ii[i+1]- Ii[i]; 3734 JJ = J + Ii[i]; 3735 nnz_max = PetscMax(nnz_max,nnz); 3736 d = 0; 3737 for (j=0; j<nnz; j++) { 3738 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3739 } 3740 d_nnz[i] = d; 3741 o_nnz[i] = nnz - d; 3742 } 3743 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3744 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3745 3746 if (v) values = (PetscScalar*)v; 3747 else { 3748 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3749 } 3750 3751 for (i=0; i<m; i++) { 3752 ii = i + rstart; 3753 nnz = Ii[i+1]- Ii[i]; 3754 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3755 } 3756 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3757 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3758 3759 if (!v) { 3760 ierr = PetscFree(values);CHKERRQ(ierr); 3761 } 3762 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3763 PetscFunctionReturn(0); 3764 } 3765 3766 #undef __FUNCT__ 3767 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3768 /*@ 3769 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3770 (the default parallel PETSc format). 3771 3772 Collective on MPI_Comm 3773 3774 Input Parameters: 3775 + B - the matrix 3776 . i - the indices into j for the start of each local row (starts with zero) 3777 . j - the column indices for each local row (starts with zero) 3778 - v - optional values in the matrix 3779 3780 Level: developer 3781 3782 Notes: 3783 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3784 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3785 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3786 3787 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3788 3789 The format which is used for the sparse matrix input, is equivalent to a 3790 row-major ordering.. i.e for the following matrix, the input data expected is 3791 as shown: 3792 3793 1 0 0 3794 2 0 3 P0 3795 ------- 3796 4 5 6 P1 3797 3798 Process0 [P0]: rows_owned=[0,1] 3799 i = {0,1,3} [size = nrow+1 = 2+1] 3800 j = {0,0,2} [size = nz = 6] 3801 v = {1,2,3} [size = nz = 6] 3802 3803 Process1 [P1]: rows_owned=[2] 3804 i = {0,3} [size = nrow+1 = 1+1] 3805 j = {0,1,2} [size = nz = 6] 3806 v = {4,5,6} [size = nz = 6] 3807 3808 .keywords: matrix, aij, compressed row, sparse, parallel 3809 3810 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3811 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3812 @*/ 3813 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3814 { 3815 PetscErrorCode ierr; 3816 3817 PetscFunctionBegin; 3818 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3819 PetscFunctionReturn(0); 3820 } 3821 3822 #undef __FUNCT__ 3823 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3824 /*@C 3825 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3826 (the default parallel PETSc format). For good matrix assembly performance 3827 the user should preallocate the matrix storage by setting the parameters 3828 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3829 performance can be increased by more than a factor of 50. 3830 3831 Collective on MPI_Comm 3832 3833 Input Parameters: 3834 + A - the matrix 3835 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3836 (same value is used for all local rows) 3837 . d_nnz - array containing the number of nonzeros in the various rows of the 3838 DIAGONAL portion of the local submatrix (possibly different for each row) 3839 or NULL, if d_nz is used to specify the nonzero structure. 3840 The size of this array is equal to the number of local rows, i.e 'm'. 3841 For matrices that will be factored, you must leave room for (and set) 3842 the diagonal entry even if it is zero. 3843 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3844 submatrix (same value is used for all local rows). 3845 - o_nnz - array containing the number of nonzeros in the various rows of the 3846 OFF-DIAGONAL portion of the local submatrix (possibly different for 3847 each row) or NULL, if o_nz is used to specify the nonzero 3848 structure. The size of this array is equal to the number 3849 of local rows, i.e 'm'. 3850 3851 If the *_nnz parameter is given then the *_nz parameter is ignored 3852 3853 The AIJ format (also called the Yale sparse matrix format or 3854 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3855 storage. The stored row and column indices begin with zero. 3856 See Users-Manual: ch_mat for details. 3857 3858 The parallel matrix is partitioned such that the first m0 rows belong to 3859 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3860 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3861 3862 The DIAGONAL portion of the local submatrix of a processor can be defined 3863 as the submatrix which is obtained by extraction the part corresponding to 3864 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3865 first row that belongs to the processor, r2 is the last row belonging to 3866 the this processor, and c1-c2 is range of indices of the local part of a 3867 vector suitable for applying the matrix to. This is an mxn matrix. In the 3868 common case of a square matrix, the row and column ranges are the same and 3869 the DIAGONAL part is also square. The remaining portion of the local 3870 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3871 3872 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3873 3874 You can call MatGetInfo() to get information on how effective the preallocation was; 3875 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3876 You can also run with the option -info and look for messages with the string 3877 malloc in them to see if additional memory allocation was needed. 3878 3879 Example usage: 3880 3881 Consider the following 8x8 matrix with 34 non-zero values, that is 3882 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3883 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3884 as follows: 3885 3886 .vb 3887 1 2 0 | 0 3 0 | 0 4 3888 Proc0 0 5 6 | 7 0 0 | 8 0 3889 9 0 10 | 11 0 0 | 12 0 3890 ------------------------------------- 3891 13 0 14 | 15 16 17 | 0 0 3892 Proc1 0 18 0 | 19 20 21 | 0 0 3893 0 0 0 | 22 23 0 | 24 0 3894 ------------------------------------- 3895 Proc2 25 26 27 | 0 0 28 | 29 0 3896 30 0 0 | 31 32 33 | 0 34 3897 .ve 3898 3899 This can be represented as a collection of submatrices as: 3900 3901 .vb 3902 A B C 3903 D E F 3904 G H I 3905 .ve 3906 3907 Where the submatrices A,B,C are owned by proc0, D,E,F are 3908 owned by proc1, G,H,I are owned by proc2. 3909 3910 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3911 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3912 The 'M','N' parameters are 8,8, and have the same values on all procs. 3913 3914 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3915 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3916 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3917 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3918 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3919 matrix, ans [DF] as another SeqAIJ matrix. 3920 3921 When d_nz, o_nz parameters are specified, d_nz storage elements are 3922 allocated for every row of the local diagonal submatrix, and o_nz 3923 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3924 One way to choose d_nz and o_nz is to use the max nonzerors per local 3925 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3926 In this case, the values of d_nz,o_nz are: 3927 .vb 3928 proc0 : dnz = 2, o_nz = 2 3929 proc1 : dnz = 3, o_nz = 2 3930 proc2 : dnz = 1, o_nz = 4 3931 .ve 3932 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3933 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3934 for proc3. i.e we are using 12+15+10=37 storage locations to store 3935 34 values. 3936 3937 When d_nnz, o_nnz parameters are specified, the storage is specified 3938 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3939 In the above case the values for d_nnz,o_nnz are: 3940 .vb 3941 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3942 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3943 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3944 .ve 3945 Here the space allocated is sum of all the above values i.e 34, and 3946 hence pre-allocation is perfect. 3947 3948 Level: intermediate 3949 3950 .keywords: matrix, aij, compressed row, sparse, parallel 3951 3952 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3953 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3954 @*/ 3955 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3956 { 3957 PetscErrorCode ierr; 3958 3959 PetscFunctionBegin; 3960 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3961 PetscValidType(B,1); 3962 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 #undef __FUNCT__ 3967 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3968 /*@ 3969 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3970 CSR format the local rows. 3971 3972 Collective on MPI_Comm 3973 3974 Input Parameters: 3975 + comm - MPI communicator 3976 . m - number of local rows (Cannot be PETSC_DECIDE) 3977 . n - This value should be the same as the local size used in creating the 3978 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3979 calculated if N is given) For square matrices n is almost always m. 3980 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3981 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3982 . i - row indices 3983 . j - column indices 3984 - a - matrix values 3985 3986 Output Parameter: 3987 . mat - the matrix 3988 3989 Level: intermediate 3990 3991 Notes: 3992 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3993 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3994 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3995 3996 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3997 3998 The format which is used for the sparse matrix input, is equivalent to a 3999 row-major ordering.. i.e for the following matrix, the input data expected is 4000 as shown: 4001 4002 1 0 0 4003 2 0 3 P0 4004 ------- 4005 4 5 6 P1 4006 4007 Process0 [P0]: rows_owned=[0,1] 4008 i = {0,1,3} [size = nrow+1 = 2+1] 4009 j = {0,0,2} [size = nz = 6] 4010 v = {1,2,3} [size = nz = 6] 4011 4012 Process1 [P1]: rows_owned=[2] 4013 i = {0,3} [size = nrow+1 = 1+1] 4014 j = {0,1,2} [size = nz = 6] 4015 v = {4,5,6} [size = nz = 6] 4016 4017 .keywords: matrix, aij, compressed row, sparse, parallel 4018 4019 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4020 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4021 @*/ 4022 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4023 { 4024 PetscErrorCode ierr; 4025 4026 PetscFunctionBegin; 4027 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4028 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4029 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4030 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4031 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4032 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4033 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4034 PetscFunctionReturn(0); 4035 } 4036 4037 #undef __FUNCT__ 4038 #define __FUNCT__ "MatCreateAIJ" 4039 /*@C 4040 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4041 (the default parallel PETSc format). For good matrix assembly performance 4042 the user should preallocate the matrix storage by setting the parameters 4043 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4044 performance can be increased by more than a factor of 50. 4045 4046 Collective on MPI_Comm 4047 4048 Input Parameters: 4049 + comm - MPI communicator 4050 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4051 This value should be the same as the local size used in creating the 4052 y vector for the matrix-vector product y = Ax. 4053 . n - This value should be the same as the local size used in creating the 4054 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4055 calculated if N is given) For square matrices n is almost always m. 4056 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4057 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4058 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4059 (same value is used for all local rows) 4060 . d_nnz - array containing the number of nonzeros in the various rows of the 4061 DIAGONAL portion of the local submatrix (possibly different for each row) 4062 or NULL, if d_nz is used to specify the nonzero structure. 4063 The size of this array is equal to the number of local rows, i.e 'm'. 4064 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4065 submatrix (same value is used for all local rows). 4066 - o_nnz - array containing the number of nonzeros in the various rows of the 4067 OFF-DIAGONAL portion of the local submatrix (possibly different for 4068 each row) or NULL, if o_nz is used to specify the nonzero 4069 structure. The size of this array is equal to the number 4070 of local rows, i.e 'm'. 4071 4072 Output Parameter: 4073 . A - the matrix 4074 4075 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4076 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4077 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4078 4079 Notes: 4080 If the *_nnz parameter is given then the *_nz parameter is ignored 4081 4082 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4083 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4084 storage requirements for this matrix. 4085 4086 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4087 processor than it must be used on all processors that share the object for 4088 that argument. 4089 4090 The user MUST specify either the local or global matrix dimensions 4091 (possibly both). 4092 4093 The parallel matrix is partitioned across processors such that the 4094 first m0 rows belong to process 0, the next m1 rows belong to 4095 process 1, the next m2 rows belong to process 2 etc.. where 4096 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4097 values corresponding to [m x N] submatrix. 4098 4099 The columns are logically partitioned with the n0 columns belonging 4100 to 0th partition, the next n1 columns belonging to the next 4101 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4102 4103 The DIAGONAL portion of the local submatrix on any given processor 4104 is the submatrix corresponding to the rows and columns m,n 4105 corresponding to the given processor. i.e diagonal matrix on 4106 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4107 etc. The remaining portion of the local submatrix [m x (N-n)] 4108 constitute the OFF-DIAGONAL portion. The example below better 4109 illustrates this concept. 4110 4111 For a square global matrix we define each processor's diagonal portion 4112 to be its local rows and the corresponding columns (a square submatrix); 4113 each processor's off-diagonal portion encompasses the remainder of the 4114 local matrix (a rectangular submatrix). 4115 4116 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4117 4118 When calling this routine with a single process communicator, a matrix of 4119 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4120 type of communicator, use the construction mechanism: 4121 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4122 4123 By default, this format uses inodes (identical nodes) when possible. 4124 We search for consecutive rows with the same nonzero structure, thereby 4125 reusing matrix information to achieve increased efficiency. 4126 4127 Options Database Keys: 4128 + -mat_no_inode - Do not use inodes 4129 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4130 - -mat_aij_oneindex - Internally use indexing starting at 1 4131 rather than 0. Note that when calling MatSetValues(), 4132 the user still MUST index entries starting at 0! 4133 4134 4135 Example usage: 4136 4137 Consider the following 8x8 matrix with 34 non-zero values, that is 4138 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4139 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4140 as follows: 4141 4142 .vb 4143 1 2 0 | 0 3 0 | 0 4 4144 Proc0 0 5 6 | 7 0 0 | 8 0 4145 9 0 10 | 11 0 0 | 12 0 4146 ------------------------------------- 4147 13 0 14 | 15 16 17 | 0 0 4148 Proc1 0 18 0 | 19 20 21 | 0 0 4149 0 0 0 | 22 23 0 | 24 0 4150 ------------------------------------- 4151 Proc2 25 26 27 | 0 0 28 | 29 0 4152 30 0 0 | 31 32 33 | 0 34 4153 .ve 4154 4155 This can be represented as a collection of submatrices as: 4156 4157 .vb 4158 A B C 4159 D E F 4160 G H I 4161 .ve 4162 4163 Where the submatrices A,B,C are owned by proc0, D,E,F are 4164 owned by proc1, G,H,I are owned by proc2. 4165 4166 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4167 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4168 The 'M','N' parameters are 8,8, and have the same values on all procs. 4169 4170 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4171 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4172 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4173 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4174 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4175 matrix, ans [DF] as another SeqAIJ matrix. 4176 4177 When d_nz, o_nz parameters are specified, d_nz storage elements are 4178 allocated for every row of the local diagonal submatrix, and o_nz 4179 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4180 One way to choose d_nz and o_nz is to use the max nonzerors per local 4181 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4182 In this case, the values of d_nz,o_nz are: 4183 .vb 4184 proc0 : dnz = 2, o_nz = 2 4185 proc1 : dnz = 3, o_nz = 2 4186 proc2 : dnz = 1, o_nz = 4 4187 .ve 4188 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4189 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4190 for proc3. i.e we are using 12+15+10=37 storage locations to store 4191 34 values. 4192 4193 When d_nnz, o_nnz parameters are specified, the storage is specified 4194 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4195 In the above case the values for d_nnz,o_nnz are: 4196 .vb 4197 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4198 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4199 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4200 .ve 4201 Here the space allocated is sum of all the above values i.e 34, and 4202 hence pre-allocation is perfect. 4203 4204 Level: intermediate 4205 4206 .keywords: matrix, aij, compressed row, sparse, parallel 4207 4208 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4209 MPIAIJ, MatCreateMPIAIJWithArrays() 4210 @*/ 4211 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4212 { 4213 PetscErrorCode ierr; 4214 PetscMPIInt size; 4215 4216 PetscFunctionBegin; 4217 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4218 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4219 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4220 if (size > 1) { 4221 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4222 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4223 } else { 4224 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4225 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4226 } 4227 PetscFunctionReturn(0); 4228 } 4229 4230 #undef __FUNCT__ 4231 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4232 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4233 { 4234 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4235 4236 PetscFunctionBegin; 4237 *Ad = a->A; 4238 *Ao = a->B; 4239 *colmap = a->garray; 4240 PetscFunctionReturn(0); 4241 } 4242 4243 #undef __FUNCT__ 4244 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4245 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4246 { 4247 PetscErrorCode ierr; 4248 PetscInt i; 4249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4250 4251 PetscFunctionBegin; 4252 if (coloring->ctype == IS_COLORING_GLOBAL) { 4253 ISColoringValue *allcolors,*colors; 4254 ISColoring ocoloring; 4255 4256 /* set coloring for diagonal portion */ 4257 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4258 4259 /* set coloring for off-diagonal portion */ 4260 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4261 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4262 for (i=0; i<a->B->cmap->n; i++) { 4263 colors[i] = allcolors[a->garray[i]]; 4264 } 4265 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4266 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4267 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4268 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4269 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4270 ISColoringValue *colors; 4271 PetscInt *larray; 4272 ISColoring ocoloring; 4273 4274 /* set coloring for diagonal portion */ 4275 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4276 for (i=0; i<a->A->cmap->n; i++) { 4277 larray[i] = i + A->cmap->rstart; 4278 } 4279 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4280 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4281 for (i=0; i<a->A->cmap->n; i++) { 4282 colors[i] = coloring->colors[larray[i]]; 4283 } 4284 ierr = PetscFree(larray);CHKERRQ(ierr); 4285 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4286 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4287 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4288 4289 /* set coloring for off-diagonal portion */ 4290 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4291 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4292 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4293 for (i=0; i<a->B->cmap->n; i++) { 4294 colors[i] = coloring->colors[larray[i]]; 4295 } 4296 ierr = PetscFree(larray);CHKERRQ(ierr); 4297 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4298 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4299 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4300 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4301 PetscFunctionReturn(0); 4302 } 4303 4304 #undef __FUNCT__ 4305 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4306 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4307 { 4308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4309 PetscErrorCode ierr; 4310 4311 PetscFunctionBegin; 4312 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4313 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4319 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4320 { 4321 PetscErrorCode ierr; 4322 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4323 PetscInt *indx; 4324 4325 PetscFunctionBegin; 4326 /* This routine will ONLY return MPIAIJ type matrix */ 4327 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4328 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4329 if (n == PETSC_DECIDE) { 4330 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4331 } 4332 /* Check sum(n) = N */ 4333 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4334 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4335 4336 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4337 rstart -= m; 4338 4339 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4340 for (i=0; i<m; i++) { 4341 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4342 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4343 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4344 } 4345 4346 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4347 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4348 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4349 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4350 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4351 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4352 PetscFunctionReturn(0); 4353 } 4354 4355 #undef __FUNCT__ 4356 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4357 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4358 { 4359 PetscErrorCode ierr; 4360 PetscInt m,N,i,rstart,nnz,Ii; 4361 PetscInt *indx; 4362 PetscScalar *values; 4363 4364 PetscFunctionBegin; 4365 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4366 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4367 for (i=0; i<m; i++) { 4368 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4369 Ii = i + rstart; 4370 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4371 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4372 } 4373 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4374 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4375 PetscFunctionReturn(0); 4376 } 4377 4378 #undef __FUNCT__ 4379 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4380 /*@ 4381 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4382 matrices from each processor 4383 4384 Collective on MPI_Comm 4385 4386 Input Parameters: 4387 + comm - the communicators the parallel matrix will live on 4388 . inmat - the input sequential matrices 4389 . n - number of local columns (or PETSC_DECIDE) 4390 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4391 4392 Output Parameter: 4393 . outmat - the parallel matrix generated 4394 4395 Level: advanced 4396 4397 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4398 4399 @*/ 4400 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4401 { 4402 PetscErrorCode ierr; 4403 PetscMPIInt size; 4404 4405 PetscFunctionBegin; 4406 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4407 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4408 if (size == 1) { 4409 if (scall == MAT_INITIAL_MATRIX) { 4410 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4411 } else { 4412 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4413 } 4414 } else { 4415 if (scall == MAT_INITIAL_MATRIX) { 4416 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4417 } 4418 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4419 } 4420 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4421 PetscFunctionReturn(0); 4422 } 4423 4424 #undef __FUNCT__ 4425 #define __FUNCT__ "MatFileSplit" 4426 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4427 { 4428 PetscErrorCode ierr; 4429 PetscMPIInt rank; 4430 PetscInt m,N,i,rstart,nnz; 4431 size_t len; 4432 const PetscInt *indx; 4433 PetscViewer out; 4434 char *name; 4435 Mat B; 4436 const PetscScalar *values; 4437 4438 PetscFunctionBegin; 4439 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4440 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4441 /* Should this be the type of the diagonal block of A? */ 4442 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4443 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4444 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4445 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4446 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4447 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4448 for (i=0; i<m; i++) { 4449 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4450 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4451 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4452 } 4453 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4454 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4455 4456 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4457 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4458 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4459 sprintf(name,"%s.%d",outfile,rank); 4460 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4461 ierr = PetscFree(name);CHKERRQ(ierr); 4462 ierr = MatView(B,out);CHKERRQ(ierr); 4463 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4464 ierr = MatDestroy(&B);CHKERRQ(ierr); 4465 PetscFunctionReturn(0); 4466 } 4467 4468 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4469 #undef __FUNCT__ 4470 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4471 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4472 { 4473 PetscErrorCode ierr; 4474 Mat_Merge_SeqsToMPI *merge; 4475 PetscContainer container; 4476 4477 PetscFunctionBegin; 4478 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4479 if (container) { 4480 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4493 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4494 ierr = PetscFree(merge);CHKERRQ(ierr); 4495 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4496 } 4497 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4498 PetscFunctionReturn(0); 4499 } 4500 4501 #include <../src/mat/utils/freespace.h> 4502 #include <petscbt.h> 4503 4504 #undef __FUNCT__ 4505 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4506 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4507 { 4508 PetscErrorCode ierr; 4509 MPI_Comm comm; 4510 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4511 PetscMPIInt size,rank,taga,*len_s; 4512 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4513 PetscInt proc,m; 4514 PetscInt **buf_ri,**buf_rj; 4515 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4516 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4517 MPI_Request *s_waits,*r_waits; 4518 MPI_Status *status; 4519 MatScalar *aa=a->a; 4520 MatScalar **abuf_r,*ba_i; 4521 Mat_Merge_SeqsToMPI *merge; 4522 PetscContainer container; 4523 4524 PetscFunctionBegin; 4525 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4526 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4527 4528 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4529 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4530 4531 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4532 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4533 4534 bi = merge->bi; 4535 bj = merge->bj; 4536 buf_ri = merge->buf_ri; 4537 buf_rj = merge->buf_rj; 4538 4539 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4540 owners = merge->rowmap->range; 4541 len_s = merge->len_s; 4542 4543 /* send and recv matrix values */ 4544 /*-----------------------------*/ 4545 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4546 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4547 4548 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4549 for (proc=0,k=0; proc<size; proc++) { 4550 if (!len_s[proc]) continue; 4551 i = owners[proc]; 4552 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4553 k++; 4554 } 4555 4556 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4557 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4558 ierr = PetscFree(status);CHKERRQ(ierr); 4559 4560 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4561 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4562 4563 /* insert mat values of mpimat */ 4564 /*----------------------------*/ 4565 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4566 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4567 4568 for (k=0; k<merge->nrecv; k++) { 4569 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4570 nrows = *(buf_ri_k[k]); 4571 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4572 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4573 } 4574 4575 /* set values of ba */ 4576 m = merge->rowmap->n; 4577 for (i=0; i<m; i++) { 4578 arow = owners[rank] + i; 4579 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4580 bnzi = bi[i+1] - bi[i]; 4581 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4582 4583 /* add local non-zero vals of this proc's seqmat into ba */ 4584 anzi = ai[arow+1] - ai[arow]; 4585 aj = a->j + ai[arow]; 4586 aa = a->a + ai[arow]; 4587 nextaj = 0; 4588 for (j=0; nextaj<anzi; j++) { 4589 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4590 ba_i[j] += aa[nextaj++]; 4591 } 4592 } 4593 4594 /* add received vals into ba */ 4595 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4596 /* i-th row */ 4597 if (i == *nextrow[k]) { 4598 anzi = *(nextai[k]+1) - *nextai[k]; 4599 aj = buf_rj[k] + *(nextai[k]); 4600 aa = abuf_r[k] + *(nextai[k]); 4601 nextaj = 0; 4602 for (j=0; nextaj<anzi; j++) { 4603 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4604 ba_i[j] += aa[nextaj++]; 4605 } 4606 } 4607 nextrow[k]++; nextai[k]++; 4608 } 4609 } 4610 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4611 } 4612 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4613 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4614 4615 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4616 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4617 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4618 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4619 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4620 PetscFunctionReturn(0); 4621 } 4622 4623 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4624 4625 #undef __FUNCT__ 4626 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4627 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4628 { 4629 PetscErrorCode ierr; 4630 Mat B_mpi; 4631 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4632 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4633 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4634 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4635 PetscInt len,proc,*dnz,*onz,bs,cbs; 4636 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4637 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4638 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4639 MPI_Status *status; 4640 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4641 PetscBT lnkbt; 4642 Mat_Merge_SeqsToMPI *merge; 4643 PetscContainer container; 4644 4645 PetscFunctionBegin; 4646 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4647 4648 /* make sure it is a PETSc comm */ 4649 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4650 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4651 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4652 4653 ierr = PetscNew(&merge);CHKERRQ(ierr); 4654 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4655 4656 /* determine row ownership */ 4657 /*---------------------------------------------------------*/ 4658 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4659 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4660 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4661 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4662 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4663 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4664 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4665 4666 m = merge->rowmap->n; 4667 owners = merge->rowmap->range; 4668 4669 /* determine the number of messages to send, their lengths */ 4670 /*---------------------------------------------------------*/ 4671 len_s = merge->len_s; 4672 4673 len = 0; /* length of buf_si[] */ 4674 merge->nsend = 0; 4675 for (proc=0; proc<size; proc++) { 4676 len_si[proc] = 0; 4677 if (proc == rank) { 4678 len_s[proc] = 0; 4679 } else { 4680 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4681 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4682 } 4683 if (len_s[proc]) { 4684 merge->nsend++; 4685 nrows = 0; 4686 for (i=owners[proc]; i<owners[proc+1]; i++) { 4687 if (ai[i+1] > ai[i]) nrows++; 4688 } 4689 len_si[proc] = 2*(nrows+1); 4690 len += len_si[proc]; 4691 } 4692 } 4693 4694 /* determine the number and length of messages to receive for ij-structure */ 4695 /*-------------------------------------------------------------------------*/ 4696 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4697 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4698 4699 /* post the Irecv of j-structure */ 4700 /*-------------------------------*/ 4701 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4702 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4703 4704 /* post the Isend of j-structure */ 4705 /*--------------------------------*/ 4706 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4707 4708 for (proc=0, k=0; proc<size; proc++) { 4709 if (!len_s[proc]) continue; 4710 i = owners[proc]; 4711 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4712 k++; 4713 } 4714 4715 /* receives and sends of j-structure are complete */ 4716 /*------------------------------------------------*/ 4717 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4718 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4719 4720 /* send and recv i-structure */ 4721 /*---------------------------*/ 4722 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4723 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4724 4725 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4726 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4727 for (proc=0,k=0; proc<size; proc++) { 4728 if (!len_s[proc]) continue; 4729 /* form outgoing message for i-structure: 4730 buf_si[0]: nrows to be sent 4731 [1:nrows]: row index (global) 4732 [nrows+1:2*nrows+1]: i-structure index 4733 */ 4734 /*-------------------------------------------*/ 4735 nrows = len_si[proc]/2 - 1; 4736 buf_si_i = buf_si + nrows+1; 4737 buf_si[0] = nrows; 4738 buf_si_i[0] = 0; 4739 nrows = 0; 4740 for (i=owners[proc]; i<owners[proc+1]; i++) { 4741 anzi = ai[i+1] - ai[i]; 4742 if (anzi) { 4743 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4744 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4745 nrows++; 4746 } 4747 } 4748 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4749 k++; 4750 buf_si += len_si[proc]; 4751 } 4752 4753 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4754 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4755 4756 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4757 for (i=0; i<merge->nrecv; i++) { 4758 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4759 } 4760 4761 ierr = PetscFree(len_si);CHKERRQ(ierr); 4762 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4763 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4764 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4765 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4766 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4767 ierr = PetscFree(status);CHKERRQ(ierr); 4768 4769 /* compute a local seq matrix in each processor */ 4770 /*----------------------------------------------*/ 4771 /* allocate bi array and free space for accumulating nonzero column info */ 4772 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4773 bi[0] = 0; 4774 4775 /* create and initialize a linked list */ 4776 nlnk = N+1; 4777 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4778 4779 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4780 len = ai[owners[rank+1]] - ai[owners[rank]]; 4781 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4782 4783 current_space = free_space; 4784 4785 /* determine symbolic info for each local row */ 4786 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4787 4788 for (k=0; k<merge->nrecv; k++) { 4789 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4790 nrows = *buf_ri_k[k]; 4791 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4792 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4793 } 4794 4795 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4796 len = 0; 4797 for (i=0; i<m; i++) { 4798 bnzi = 0; 4799 /* add local non-zero cols of this proc's seqmat into lnk */ 4800 arow = owners[rank] + i; 4801 anzi = ai[arow+1] - ai[arow]; 4802 aj = a->j + ai[arow]; 4803 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4804 bnzi += nlnk; 4805 /* add received col data into lnk */ 4806 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4807 if (i == *nextrow[k]) { /* i-th row */ 4808 anzi = *(nextai[k]+1) - *nextai[k]; 4809 aj = buf_rj[k] + *nextai[k]; 4810 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4811 bnzi += nlnk; 4812 nextrow[k]++; nextai[k]++; 4813 } 4814 } 4815 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4816 4817 /* if free space is not available, make more free space */ 4818 if (current_space->local_remaining<bnzi) { 4819 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4820 nspacedouble++; 4821 } 4822 /* copy data into free space, then initialize lnk */ 4823 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4824 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4825 4826 current_space->array += bnzi; 4827 current_space->local_used += bnzi; 4828 current_space->local_remaining -= bnzi; 4829 4830 bi[i+1] = bi[i] + bnzi; 4831 } 4832 4833 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4834 4835 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4836 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4837 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4838 4839 /* create symbolic parallel matrix B_mpi */ 4840 /*---------------------------------------*/ 4841 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4842 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4843 if (n==PETSC_DECIDE) { 4844 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4845 } else { 4846 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4847 } 4848 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4849 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4850 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4851 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4852 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4853 4854 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4855 B_mpi->assembled = PETSC_FALSE; 4856 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4857 merge->bi = bi; 4858 merge->bj = bj; 4859 merge->buf_ri = buf_ri; 4860 merge->buf_rj = buf_rj; 4861 merge->coi = NULL; 4862 merge->coj = NULL; 4863 merge->owners_co = NULL; 4864 4865 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4866 4867 /* attach the supporting struct to B_mpi for reuse */ 4868 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4869 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4870 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4871 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4872 *mpimat = B_mpi; 4873 4874 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4875 PetscFunctionReturn(0); 4876 } 4877 4878 #undef __FUNCT__ 4879 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4880 /*@C 4881 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4882 matrices from each processor 4883 4884 Collective on MPI_Comm 4885 4886 Input Parameters: 4887 + comm - the communicators the parallel matrix will live on 4888 . seqmat - the input sequential matrices 4889 . m - number of local rows (or PETSC_DECIDE) 4890 . n - number of local columns (or PETSC_DECIDE) 4891 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4892 4893 Output Parameter: 4894 . mpimat - the parallel matrix generated 4895 4896 Level: advanced 4897 4898 Notes: 4899 The dimensions of the sequential matrix in each processor MUST be the same. 4900 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4901 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4902 @*/ 4903 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4904 { 4905 PetscErrorCode ierr; 4906 PetscMPIInt size; 4907 4908 PetscFunctionBegin; 4909 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4910 if (size == 1) { 4911 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4912 if (scall == MAT_INITIAL_MATRIX) { 4913 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4914 } else { 4915 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4916 } 4917 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4918 PetscFunctionReturn(0); 4919 } 4920 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4921 if (scall == MAT_INITIAL_MATRIX) { 4922 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4923 } 4924 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4925 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4926 PetscFunctionReturn(0); 4927 } 4928 4929 #undef __FUNCT__ 4930 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4931 /*@ 4932 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4933 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4934 with MatGetSize() 4935 4936 Not Collective 4937 4938 Input Parameters: 4939 + A - the matrix 4940 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4941 4942 Output Parameter: 4943 . A_loc - the local sequential matrix generated 4944 4945 Level: developer 4946 4947 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4948 4949 @*/ 4950 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4951 { 4952 PetscErrorCode ierr; 4953 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4954 Mat_SeqAIJ *mat,*a,*b; 4955 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4956 MatScalar *aa,*ba,*cam; 4957 PetscScalar *ca; 4958 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4959 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4960 PetscBool match; 4961 4962 PetscFunctionBegin; 4963 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4964 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4965 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4966 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4967 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4968 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4969 aa = a->a; ba = b->a; 4970 if (scall == MAT_INITIAL_MATRIX) { 4971 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4972 ci[0] = 0; 4973 for (i=0; i<am; i++) { 4974 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4975 } 4976 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4977 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4978 k = 0; 4979 for (i=0; i<am; i++) { 4980 ncols_o = bi[i+1] - bi[i]; 4981 ncols_d = ai[i+1] - ai[i]; 4982 /* off-diagonal portion of A */ 4983 for (jo=0; jo<ncols_o; jo++) { 4984 col = cmap[*bj]; 4985 if (col >= cstart) break; 4986 cj[k] = col; bj++; 4987 ca[k++] = *ba++; 4988 } 4989 /* diagonal portion of A */ 4990 for (j=0; j<ncols_d; j++) { 4991 cj[k] = cstart + *aj++; 4992 ca[k++] = *aa++; 4993 } 4994 /* off-diagonal portion of A */ 4995 for (j=jo; j<ncols_o; j++) { 4996 cj[k] = cmap[*bj++]; 4997 ca[k++] = *ba++; 4998 } 4999 } 5000 /* put together the new matrix */ 5001 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5002 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5003 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5004 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5005 mat->free_a = PETSC_TRUE; 5006 mat->free_ij = PETSC_TRUE; 5007 mat->nonew = 0; 5008 } else if (scall == MAT_REUSE_MATRIX) { 5009 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5010 ci = mat->i; cj = mat->j; cam = mat->a; 5011 for (i=0; i<am; i++) { 5012 /* off-diagonal portion of A */ 5013 ncols_o = bi[i+1] - bi[i]; 5014 for (jo=0; jo<ncols_o; jo++) { 5015 col = cmap[*bj]; 5016 if (col >= cstart) break; 5017 *cam++ = *ba++; bj++; 5018 } 5019 /* diagonal portion of A */ 5020 ncols_d = ai[i+1] - ai[i]; 5021 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5022 /* off-diagonal portion of A */ 5023 for (j=jo; j<ncols_o; j++) { 5024 *cam++ = *ba++; bj++; 5025 } 5026 } 5027 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5028 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5029 PetscFunctionReturn(0); 5030 } 5031 5032 #undef __FUNCT__ 5033 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5034 /*@C 5035 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5036 5037 Not Collective 5038 5039 Input Parameters: 5040 + A - the matrix 5041 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5042 - row, col - index sets of rows and columns to extract (or NULL) 5043 5044 Output Parameter: 5045 . A_loc - the local sequential matrix generated 5046 5047 Level: developer 5048 5049 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5050 5051 @*/ 5052 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5053 { 5054 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5055 PetscErrorCode ierr; 5056 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5057 IS isrowa,iscola; 5058 Mat *aloc; 5059 PetscBool match; 5060 5061 PetscFunctionBegin; 5062 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5063 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5064 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5065 if (!row) { 5066 start = A->rmap->rstart; end = A->rmap->rend; 5067 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5068 } else { 5069 isrowa = *row; 5070 } 5071 if (!col) { 5072 start = A->cmap->rstart; 5073 cmap = a->garray; 5074 nzA = a->A->cmap->n; 5075 nzB = a->B->cmap->n; 5076 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5077 ncols = 0; 5078 for (i=0; i<nzB; i++) { 5079 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5080 else break; 5081 } 5082 imark = i; 5083 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5084 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5085 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5086 } else { 5087 iscola = *col; 5088 } 5089 if (scall != MAT_INITIAL_MATRIX) { 5090 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5091 aloc[0] = *A_loc; 5092 } 5093 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5094 *A_loc = aloc[0]; 5095 ierr = PetscFree(aloc);CHKERRQ(ierr); 5096 if (!row) { 5097 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5098 } 5099 if (!col) { 5100 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5101 } 5102 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5103 PetscFunctionReturn(0); 5104 } 5105 5106 #undef __FUNCT__ 5107 #define __FUNCT__ "MatGetBrowsOfAcols" 5108 /*@C 5109 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5110 5111 Collective on Mat 5112 5113 Input Parameters: 5114 + A,B - the matrices in mpiaij format 5115 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5116 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5117 5118 Output Parameter: 5119 + rowb, colb - index sets of rows and columns of B to extract 5120 - B_seq - the sequential matrix generated 5121 5122 Level: developer 5123 5124 @*/ 5125 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5126 { 5127 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5128 PetscErrorCode ierr; 5129 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5130 IS isrowb,iscolb; 5131 Mat *bseq=NULL; 5132 5133 PetscFunctionBegin; 5134 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5135 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5136 } 5137 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5138 5139 if (scall == MAT_INITIAL_MATRIX) { 5140 start = A->cmap->rstart; 5141 cmap = a->garray; 5142 nzA = a->A->cmap->n; 5143 nzB = a->B->cmap->n; 5144 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5145 ncols = 0; 5146 for (i=0; i<nzB; i++) { /* row < local row index */ 5147 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5148 else break; 5149 } 5150 imark = i; 5151 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5152 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5153 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5154 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5155 } else { 5156 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5157 isrowb = *rowb; iscolb = *colb; 5158 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5159 bseq[0] = *B_seq; 5160 } 5161 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5162 *B_seq = bseq[0]; 5163 ierr = PetscFree(bseq);CHKERRQ(ierr); 5164 if (!rowb) { 5165 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5166 } else { 5167 *rowb = isrowb; 5168 } 5169 if (!colb) { 5170 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5171 } else { 5172 *colb = iscolb; 5173 } 5174 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5175 PetscFunctionReturn(0); 5176 } 5177 5178 #undef __FUNCT__ 5179 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5180 /* 5181 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5182 of the OFF-DIAGONAL portion of local A 5183 5184 Collective on Mat 5185 5186 Input Parameters: 5187 + A,B - the matrices in mpiaij format 5188 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5189 5190 Output Parameter: 5191 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5192 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5193 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5194 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5195 5196 Level: developer 5197 5198 */ 5199 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5200 { 5201 VecScatter_MPI_General *gen_to,*gen_from; 5202 PetscErrorCode ierr; 5203 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5204 Mat_SeqAIJ *b_oth; 5205 VecScatter ctx =a->Mvctx; 5206 MPI_Comm comm; 5207 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5208 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5209 PetscScalar *rvalues,*svalues; 5210 MatScalar *b_otha,*bufa,*bufA; 5211 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5212 MPI_Request *rwaits = NULL,*swaits = NULL; 5213 MPI_Status *sstatus,rstatus; 5214 PetscMPIInt jj; 5215 PetscInt *cols,sbs,rbs; 5216 PetscScalar *vals; 5217 5218 PetscFunctionBegin; 5219 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5220 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5221 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5222 } 5223 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5224 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5225 5226 gen_to = (VecScatter_MPI_General*)ctx->todata; 5227 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5228 rvalues = gen_from->values; /* holds the length of receiving row */ 5229 svalues = gen_to->values; /* holds the length of sending row */ 5230 nrecvs = gen_from->n; 5231 nsends = gen_to->n; 5232 5233 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5234 srow = gen_to->indices; /* local row index to be sent */ 5235 sstarts = gen_to->starts; 5236 sprocs = gen_to->procs; 5237 sstatus = gen_to->sstatus; 5238 sbs = gen_to->bs; 5239 rstarts = gen_from->starts; 5240 rprocs = gen_from->procs; 5241 rbs = gen_from->bs; 5242 5243 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5244 if (scall == MAT_INITIAL_MATRIX) { 5245 /* i-array */ 5246 /*---------*/ 5247 /* post receives */ 5248 for (i=0; i<nrecvs; i++) { 5249 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5250 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5251 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5252 } 5253 5254 /* pack the outgoing message */ 5255 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5256 5257 sstartsj[0] = 0; 5258 rstartsj[0] = 0; 5259 len = 0; /* total length of j or a array to be sent */ 5260 k = 0; 5261 for (i=0; i<nsends; i++) { 5262 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5263 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5264 for (j=0; j<nrows; j++) { 5265 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5266 for (l=0; l<sbs; l++) { 5267 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5268 5269 rowlen[j*sbs+l] = ncols; 5270 5271 len += ncols; 5272 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5273 } 5274 k++; 5275 } 5276 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5277 5278 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5279 } 5280 /* recvs and sends of i-array are completed */ 5281 i = nrecvs; 5282 while (i--) { 5283 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5284 } 5285 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5286 5287 /* allocate buffers for sending j and a arrays */ 5288 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5289 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5290 5291 /* create i-array of B_oth */ 5292 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5293 5294 b_othi[0] = 0; 5295 len = 0; /* total length of j or a array to be received */ 5296 k = 0; 5297 for (i=0; i<nrecvs; i++) { 5298 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5299 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5300 for (j=0; j<nrows; j++) { 5301 b_othi[k+1] = b_othi[k] + rowlen[j]; 5302 len += rowlen[j]; k++; 5303 } 5304 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5305 } 5306 5307 /* allocate space for j and a arrrays of B_oth */ 5308 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5309 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5310 5311 /* j-array */ 5312 /*---------*/ 5313 /* post receives of j-array */ 5314 for (i=0; i<nrecvs; i++) { 5315 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5316 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5317 } 5318 5319 /* pack the outgoing message j-array */ 5320 k = 0; 5321 for (i=0; i<nsends; i++) { 5322 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5323 bufJ = bufj+sstartsj[i]; 5324 for (j=0; j<nrows; j++) { 5325 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5326 for (ll=0; ll<sbs; ll++) { 5327 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5328 for (l=0; l<ncols; l++) { 5329 *bufJ++ = cols[l]; 5330 } 5331 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5332 } 5333 } 5334 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5335 } 5336 5337 /* recvs and sends of j-array are completed */ 5338 i = nrecvs; 5339 while (i--) { 5340 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5341 } 5342 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5343 } else if (scall == MAT_REUSE_MATRIX) { 5344 sstartsj = *startsj_s; 5345 rstartsj = *startsj_r; 5346 bufa = *bufa_ptr; 5347 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5348 b_otha = b_oth->a; 5349 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5350 5351 /* a-array */ 5352 /*---------*/ 5353 /* post receives of a-array */ 5354 for (i=0; i<nrecvs; i++) { 5355 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5356 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5357 } 5358 5359 /* pack the outgoing message a-array */ 5360 k = 0; 5361 for (i=0; i<nsends; i++) { 5362 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5363 bufA = bufa+sstartsj[i]; 5364 for (j=0; j<nrows; j++) { 5365 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5366 for (ll=0; ll<sbs; ll++) { 5367 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5368 for (l=0; l<ncols; l++) { 5369 *bufA++ = vals[l]; 5370 } 5371 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5372 } 5373 } 5374 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5375 } 5376 /* recvs and sends of a-array are completed */ 5377 i = nrecvs; 5378 while (i--) { 5379 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5380 } 5381 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5382 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5383 5384 if (scall == MAT_INITIAL_MATRIX) { 5385 /* put together the new matrix */ 5386 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5387 5388 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5389 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5390 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5391 b_oth->free_a = PETSC_TRUE; 5392 b_oth->free_ij = PETSC_TRUE; 5393 b_oth->nonew = 0; 5394 5395 ierr = PetscFree(bufj);CHKERRQ(ierr); 5396 if (!startsj_s || !bufa_ptr) { 5397 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5398 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5399 } else { 5400 *startsj_s = sstartsj; 5401 *startsj_r = rstartsj; 5402 *bufa_ptr = bufa; 5403 } 5404 } 5405 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5406 PetscFunctionReturn(0); 5407 } 5408 5409 #undef __FUNCT__ 5410 #define __FUNCT__ "MatGetCommunicationStructs" 5411 /*@C 5412 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5413 5414 Not Collective 5415 5416 Input Parameters: 5417 . A - The matrix in mpiaij format 5418 5419 Output Parameter: 5420 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5421 . colmap - A map from global column index to local index into lvec 5422 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5423 5424 Level: developer 5425 5426 @*/ 5427 #if defined(PETSC_USE_CTABLE) 5428 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5429 #else 5430 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5431 #endif 5432 { 5433 Mat_MPIAIJ *a; 5434 5435 PetscFunctionBegin; 5436 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5437 PetscValidPointer(lvec, 2); 5438 PetscValidPointer(colmap, 3); 5439 PetscValidPointer(multScatter, 4); 5440 a = (Mat_MPIAIJ*) A->data; 5441 if (lvec) *lvec = a->lvec; 5442 if (colmap) *colmap = a->colmap; 5443 if (multScatter) *multScatter = a->Mvctx; 5444 PetscFunctionReturn(0); 5445 } 5446 5447 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5448 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5449 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5450 5451 #undef __FUNCT__ 5452 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5453 /* 5454 Computes (B'*A')' since computing B*A directly is untenable 5455 5456 n p p 5457 ( ) ( ) ( ) 5458 m ( A ) * n ( B ) = m ( C ) 5459 ( ) ( ) ( ) 5460 5461 */ 5462 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5463 { 5464 PetscErrorCode ierr; 5465 Mat At,Bt,Ct; 5466 5467 PetscFunctionBegin; 5468 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5469 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5470 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5471 ierr = MatDestroy(&At);CHKERRQ(ierr); 5472 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5473 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5474 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5475 PetscFunctionReturn(0); 5476 } 5477 5478 #undef __FUNCT__ 5479 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5480 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5481 { 5482 PetscErrorCode ierr; 5483 PetscInt m=A->rmap->n,n=B->cmap->n; 5484 Mat Cmat; 5485 5486 PetscFunctionBegin; 5487 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5488 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5489 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5490 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5491 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5492 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5493 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5495 5496 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5497 5498 *C = Cmat; 5499 PetscFunctionReturn(0); 5500 } 5501 5502 /* ----------------------------------------------------------------*/ 5503 #undef __FUNCT__ 5504 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5505 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5506 { 5507 PetscErrorCode ierr; 5508 5509 PetscFunctionBegin; 5510 if (scall == MAT_INITIAL_MATRIX) { 5511 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5512 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5513 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5514 } 5515 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5516 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5517 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5518 PetscFunctionReturn(0); 5519 } 5520 5521 #if defined(PETSC_HAVE_MUMPS) 5522 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5523 #endif 5524 #if defined(PETSC_HAVE_PASTIX) 5525 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5526 #endif 5527 #if defined(PETSC_HAVE_SUPERLU_DIST) 5528 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5529 #endif 5530 #if defined(PETSC_HAVE_CLIQUE) 5531 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5532 #endif 5533 5534 /*MC 5535 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5536 5537 Options Database Keys: 5538 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5539 5540 Level: beginner 5541 5542 .seealso: MatCreateAIJ() 5543 M*/ 5544 5545 #undef __FUNCT__ 5546 #define __FUNCT__ "MatCreate_MPIAIJ" 5547 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5548 { 5549 Mat_MPIAIJ *b; 5550 PetscErrorCode ierr; 5551 PetscMPIInt size; 5552 5553 PetscFunctionBegin; 5554 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5555 5556 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5557 B->data = (void*)b; 5558 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5559 B->assembled = PETSC_FALSE; 5560 B->insertmode = NOT_SET_VALUES; 5561 b->size = size; 5562 5563 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5564 5565 /* build cache for off array entries formed */ 5566 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5567 5568 b->donotstash = PETSC_FALSE; 5569 b->colmap = 0; 5570 b->garray = 0; 5571 b->roworiented = PETSC_TRUE; 5572 5573 /* stuff used for matrix vector multiply */ 5574 b->lvec = NULL; 5575 b->Mvctx = NULL; 5576 5577 /* stuff for MatGetRow() */ 5578 b->rowindices = 0; 5579 b->rowvalues = 0; 5580 b->getrowactive = PETSC_FALSE; 5581 5582 /* flexible pointer used in CUSP/CUSPARSE classes */ 5583 b->spptr = NULL; 5584 5585 #if defined(PETSC_HAVE_MUMPS) 5586 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5587 #endif 5588 #if defined(PETSC_HAVE_PASTIX) 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5590 #endif 5591 #if defined(PETSC_HAVE_SUPERLU_DIST) 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5593 #endif 5594 #if defined(PETSC_HAVE_CLIQUE) 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5596 #endif 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5611 PetscFunctionReturn(0); 5612 } 5613 5614 #undef __FUNCT__ 5615 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5616 /*@ 5617 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5618 and "off-diagonal" part of the matrix in CSR format. 5619 5620 Collective on MPI_Comm 5621 5622 Input Parameters: 5623 + comm - MPI communicator 5624 . m - number of local rows (Cannot be PETSC_DECIDE) 5625 . n - This value should be the same as the local size used in creating the 5626 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5627 calculated if N is given) For square matrices n is almost always m. 5628 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5629 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5630 . i - row indices for "diagonal" portion of matrix 5631 . j - column indices 5632 . a - matrix values 5633 . oi - row indices for "off-diagonal" portion of matrix 5634 . oj - column indices 5635 - oa - matrix values 5636 5637 Output Parameter: 5638 . mat - the matrix 5639 5640 Level: advanced 5641 5642 Notes: 5643 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5644 must free the arrays once the matrix has been destroyed and not before. 5645 5646 The i and j indices are 0 based 5647 5648 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5649 5650 This sets local rows and cannot be used to set off-processor values. 5651 5652 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5653 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5654 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5655 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5656 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5657 communication if it is known that only local entries will be set. 5658 5659 .keywords: matrix, aij, compressed row, sparse, parallel 5660 5661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5662 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5663 @*/ 5664 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5665 { 5666 PetscErrorCode ierr; 5667 Mat_MPIAIJ *maij; 5668 5669 PetscFunctionBegin; 5670 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5671 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5672 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5673 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5674 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5675 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5676 maij = (Mat_MPIAIJ*) (*mat)->data; 5677 5678 (*mat)->preallocated = PETSC_TRUE; 5679 5680 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5681 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5682 5683 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5684 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5685 5686 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5689 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 5691 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5692 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5693 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5694 PetscFunctionReturn(0); 5695 } 5696 5697 /* 5698 Special version for direct calls from Fortran 5699 */ 5700 #include <petsc-private/fortranimpl.h> 5701 5702 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5706 #endif 5707 5708 /* Change these macros so can be used in void function */ 5709 #undef CHKERRQ 5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5711 #undef SETERRQ2 5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5713 #undef SETERRQ3 5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5715 #undef SETERRQ 5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5717 5718 #undef __FUNCT__ 5719 #define __FUNCT__ "matsetvaluesmpiaij_" 5720 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5721 { 5722 Mat mat = *mmat; 5723 PetscInt m = *mm, n = *mn; 5724 InsertMode addv = *maddv; 5725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5726 PetscScalar value; 5727 PetscErrorCode ierr; 5728 5729 MatCheckPreallocated(mat,1); 5730 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5731 5732 #if defined(PETSC_USE_DEBUG) 5733 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5734 #endif 5735 { 5736 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5737 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5738 PetscBool roworiented = aij->roworiented; 5739 5740 /* Some Variables required in the macro */ 5741 Mat A = aij->A; 5742 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5743 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5744 MatScalar *aa = a->a; 5745 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5746 Mat B = aij->B; 5747 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5748 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5749 MatScalar *ba = b->a; 5750 5751 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5752 PetscInt nonew = a->nonew; 5753 MatScalar *ap1,*ap2; 5754 5755 PetscFunctionBegin; 5756 for (i=0; i<m; i++) { 5757 if (im[i] < 0) continue; 5758 #if defined(PETSC_USE_DEBUG) 5759 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5760 #endif 5761 if (im[i] >= rstart && im[i] < rend) { 5762 row = im[i] - rstart; 5763 lastcol1 = -1; 5764 rp1 = aj + ai[row]; 5765 ap1 = aa + ai[row]; 5766 rmax1 = aimax[row]; 5767 nrow1 = ailen[row]; 5768 low1 = 0; 5769 high1 = nrow1; 5770 lastcol2 = -1; 5771 rp2 = bj + bi[row]; 5772 ap2 = ba + bi[row]; 5773 rmax2 = bimax[row]; 5774 nrow2 = bilen[row]; 5775 low2 = 0; 5776 high2 = nrow2; 5777 5778 for (j=0; j<n; j++) { 5779 if (roworiented) value = v[i*n+j]; 5780 else value = v[i+j*m]; 5781 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5782 if (in[j] >= cstart && in[j] < cend) { 5783 col = in[j] - cstart; 5784 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5785 } else if (in[j] < 0) continue; 5786 #if defined(PETSC_USE_DEBUG) 5787 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5788 #endif 5789 else { 5790 if (mat->was_assembled) { 5791 if (!aij->colmap) { 5792 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5793 } 5794 #if defined(PETSC_USE_CTABLE) 5795 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5796 col--; 5797 #else 5798 col = aij->colmap[in[j]] - 1; 5799 #endif 5800 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5801 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5802 col = in[j]; 5803 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5804 B = aij->B; 5805 b = (Mat_SeqAIJ*)B->data; 5806 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5807 rp2 = bj + bi[row]; 5808 ap2 = ba + bi[row]; 5809 rmax2 = bimax[row]; 5810 nrow2 = bilen[row]; 5811 low2 = 0; 5812 high2 = nrow2; 5813 bm = aij->B->rmap->n; 5814 ba = b->a; 5815 } 5816 } else col = in[j]; 5817 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5818 } 5819 } 5820 } else if (!aij->donotstash) { 5821 if (roworiented) { 5822 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5823 } else { 5824 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5825 } 5826 } 5827 } 5828 } 5829 PetscFunctionReturnVoid(); 5830 } 5831 5832