1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) { 526 if (roworiented) value = v[i*n+j]; 527 else value = v[i+j*m]; 528 } else value = 0.0; 529 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 530 if (in[j] >= cstart && in[j] < cend) { 531 col = in[j] - cstart; 532 nonew = a->nonew; 533 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 534 } else if (in[j] < 0) continue; 535 #if defined(PETSC_USE_DEBUG) 536 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 537 #endif 538 else { 539 if (mat->was_assembled) { 540 if (!aij->colmap) { 541 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 542 } 543 #if defined(PETSC_USE_CTABLE) 544 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 545 col--; 546 #else 547 col = aij->colmap[in[j]] - 1; 548 #endif 549 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 550 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 551 col = in[j]; 552 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 553 B = aij->B; 554 b = (Mat_SeqAIJ*)B->data; 555 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 556 rp2 = bj + bi[row]; 557 ap2 = ba + bi[row]; 558 rmax2 = bimax[row]; 559 nrow2 = bilen[row]; 560 low2 = 0; 561 high2 = nrow2; 562 bm = aij->B->rmap->n; 563 ba = b->a; 564 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 565 } else col = in[j]; 566 nonew = b->nonew; 567 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 568 } 569 } 570 } else { 571 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 572 if (!aij->donotstash) { 573 mat->assembled = PETSC_FALSE; 574 if (roworiented) { 575 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 576 } else { 577 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 578 } 579 } 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatGetValues_MPIAIJ" 587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 588 { 589 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 590 PetscErrorCode ierr; 591 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 592 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 593 594 PetscFunctionBegin; 595 for (i=0; i<m; i++) { 596 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 597 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 598 if (idxm[i] >= rstart && idxm[i] < rend) { 599 row = idxm[i] - rstart; 600 for (j=0; j<n; j++) { 601 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 602 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 603 if (idxn[j] >= cstart && idxn[j] < cend) { 604 col = idxn[j] - cstart; 605 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 606 } else { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[idxn[j]] - 1; 615 #endif 616 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 617 else { 618 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 619 } 620 } 621 } 622 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 623 } 624 PetscFunctionReturn(0); 625 } 626 627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 632 { 633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 634 PetscErrorCode ierr; 635 PetscInt nstash,reallocs; 636 InsertMode addv; 637 638 PetscFunctionBegin; 639 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 640 641 /* make sure all processors are either in INSERTMODE or ADDMODE */ 642 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 643 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 644 mat->insertmode = addv; /* in case this processor had no cache */ 645 646 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 647 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 648 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 655 { 656 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 658 PetscErrorCode ierr; 659 PetscMPIInt n; 660 PetscInt i,j,rstart,ncols,flg; 661 PetscInt *row,*col; 662 PetscBool other_disassembled; 663 PetscScalar *val; 664 InsertMode addv = mat->insertmode; 665 666 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 667 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n; ) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { 677 if (row[j] != rstart) break; 678 } 679 if (j < n) ncols = j-i; 680 else ncols = n-i; 681 /* Now assemble all these values with a single function call */ 682 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 683 684 i = j; 685 } 686 } 687 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 691 692 /* determine if any processor has disassembled, if so we must 693 also disassemble ourselfs, in order that we may reassemble. */ 694 /* 695 if nonzero structure of submatrix B cannot change then we know that 696 no processor disassembled thus we can skip this stuff 697 */ 698 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 699 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 700 if (mat->was_assembled && !other_disassembled) { 701 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 702 } 703 } 704 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 705 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 708 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 709 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 710 711 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 712 713 aij->rowvalues = 0; 714 715 /* used by MatAXPY() */ 716 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 717 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 718 719 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 720 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 721 722 { 723 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 724 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 725 } 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 747 PetscInt *owners = A->rmap->range; 748 PetscInt n = A->rmap->n; 749 PetscMPIInt size = mat->size; 750 PetscSF sf; 751 PetscInt *lrows; 752 PetscSFNode *rrows; 753 PetscInt lastidx = -1, r, p = 0, len = 0; 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 /* Create SF where leaves are input rows and roots are owned rows */ 758 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 759 for (r = 0; r < n; ++r) lrows[r] = -1; 760 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 761 for (r = 0; r < N; ++r) { 762 const PetscInt idx = rows[r]; 763 PetscBool found = PETSC_FALSE; 764 /* Trick for efficient searching for sorted rows */ 765 if (lastidx > idx) p = 0; 766 lastidx = idx; 767 for (; p < size; ++p) { 768 if (idx >= owners[p] && idx < owners[p+1]) { 769 rrows[r].rank = p; 770 rrows[r].index = rows[r] - owners[p]; 771 found = PETSC_TRUE; 772 break; 773 } 774 } 775 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 776 } 777 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 778 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 779 /* Collect flags for rows to be zeroed */ 780 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 781 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 782 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 783 /* Compress and put in row numbers */ 784 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 792 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 793 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 794 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 795 } 796 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 797 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 798 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 799 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 800 } else if (diag != 0.0) { 801 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 802 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 803 for (r = 0; r < len; ++r) { 804 const PetscInt row = lrows[r] + A->rmap->rstart; 805 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 806 } 807 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 808 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 809 } else { 810 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 811 } 812 ierr = PetscFree(lrows);CHKERRQ(ierr); 813 { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 823 { 824 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 825 PetscErrorCode ierr; 826 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 827 PetscInt i,j,r,m,p = 0,len = 0; 828 PetscInt *lrows,*owners = A->rmap->range; 829 PetscSFNode *rrows; 830 PetscSF sf; 831 const PetscScalar *xx; 832 PetscScalar *bb,*mask; 833 Vec xmask,lmask; 834 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 835 const PetscInt *aj, *ii,*ridx; 836 PetscScalar *aa; 837 #if defined(PETSC_DEBUG) 838 PetscBool found = PETSC_FALSE; 839 #endif 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 PetscBool found = PETSC_FALSE; 849 /* Trick for efficient searching for sorted rows */ 850 if (lastidx > idx) p = 0; 851 lastidx = idx; 852 for (; p < size; ++p) { 853 if (idx >= owners[p] && idx < owners[p+1]) { 854 rrows[r].rank = p; 855 rrows[r].index = rows[r] - owners[p]; 856 found = PETSC_TRUE; 857 break; 858 } 859 } 860 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 861 } 862 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 863 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 864 /* Collect flags for rows to be zeroed */ 865 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 866 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 867 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 872 /* handle off diagonal part of matrix */ 873 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 874 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 875 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 878 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 881 if (x) { 882 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 885 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 886 } 887 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 888 /* remove zeroed rows of off diagonal matrix */ 889 ii = aij->i; 890 for (i=0; i<len; i++) { 891 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 /* loop over all elements of off process part of matrix zeroing removed columns*/ 894 if (aij->compressedrow.use) { 895 m = aij->compressedrow.nrows; 896 ii = aij->compressedrow.i; 897 ridx = aij->compressedrow.rindex; 898 for (i=0; i<m; i++) { 899 n = ii[i+1] - ii[i]; 900 aj = aij->j + ii[i]; 901 aa = aij->a + ii[i]; 902 903 for (j=0; j<n; j++) { 904 if (PetscAbsScalar(mask[*aj])) { 905 if (b) bb[*ridx] -= *aa*xx[*aj]; 906 *aa = 0.0; 907 } 908 aa++; 909 aj++; 910 } 911 ridx++; 912 } 913 } else { /* do not use compressed row format */ 914 m = l->B->rmap->n; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 for (j=0; j<n; j++) { 920 if (PetscAbsScalar(mask[*aj])) { 921 if (b) bb[i] -= *aa*xx[*aj]; 922 *aa = 0.0; 923 } 924 aa++; 925 aj++; 926 } 927 } 928 } 929 if (x) { 930 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 932 } 933 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 934 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 935 ierr = PetscFree(lrows);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatMult_MPIAIJ" 941 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 942 { 943 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 944 PetscErrorCode ierr; 945 PetscInt nt; 946 947 PetscFunctionBegin; 948 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 949 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 950 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 952 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 959 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 960 { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultAdd_MPIAIJ" 971 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 986 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 987 { 988 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 PetscBool merged; 991 992 PetscFunctionBegin; 993 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 994 /* do nondiagonal part */ 995 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 996 if (!merged) { 997 /* send it on its way */ 998 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 /* do local part */ 1000 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1001 /* receive remote parts: note this assumes the values are not actually */ 1002 /* added in yy until the next line, */ 1003 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 } else { 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* send it on its way */ 1008 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 /* values actually were received in the Begin() but we need to call this nop */ 1010 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 } 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1017 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1018 { 1019 MPI_Comm comm; 1020 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1021 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1022 IS Me,Notme; 1023 PetscErrorCode ierr; 1024 PetscInt M,N,first,last,*notme,i; 1025 PetscMPIInt size; 1026 1027 PetscFunctionBegin; 1028 /* Easy test: symmetric diagonal block */ 1029 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1030 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1031 if (!*f) PetscFunctionReturn(0); 1032 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1034 if (size == 1) PetscFunctionReturn(0); 1035 1036 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1037 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1038 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1039 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1040 for (i=0; i<first; i++) notme[i] = i; 1041 for (i=last; i<M; i++) notme[i-last+first] = i; 1042 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1043 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1044 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1045 Aoff = Aoffs[0]; 1046 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1047 Boff = Boffs[0]; 1048 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1049 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1050 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1051 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1052 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1053 ierr = PetscFree(notme);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 /* do nondiagonal part */ 1066 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1067 /* send it on its way */ 1068 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1069 /* do local part */ 1070 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1071 /* receive remote parts */ 1072 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 /* 1077 This only works correctly for square matrices where the subblock A->A is the 1078 diagonal block 1079 */ 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1082 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1083 { 1084 PetscErrorCode ierr; 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 1087 PetscFunctionBegin; 1088 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1089 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1090 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatScale_MPIAIJ" 1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1103 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatDestroy_MPIAIJ" 1109 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1110 { 1111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 #if defined(PETSC_USE_LOG) 1116 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1117 #endif 1118 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1119 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1120 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1121 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1122 #if defined(PETSC_USE_CTABLE) 1123 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1124 #else 1125 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1126 #endif 1127 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1129 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1130 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1131 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1132 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1133 1134 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1148 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1149 { 1150 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1151 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1152 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1153 PetscErrorCode ierr; 1154 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1155 int fd; 1156 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1157 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1158 PetscScalar *column_values; 1159 PetscInt message_count,flowcontrolcount; 1160 FILE *file; 1161 1162 PetscFunctionBegin; 1163 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1164 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1165 nz = A->nz + B->nz; 1166 if (!rank) { 1167 header[0] = MAT_FILE_CLASSID; 1168 header[1] = mat->rmap->N; 1169 header[2] = mat->cmap->N; 1170 1171 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1173 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 /* get largest number of rows any processor has */ 1175 rlen = mat->rmap->n; 1176 range = mat->rmap->range; 1177 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1178 } else { 1179 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1180 rlen = mat->rmap->n; 1181 } 1182 1183 /* load up the local row counts */ 1184 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1185 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1186 1187 /* store the row lengths to the file */ 1188 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1189 if (!rank) { 1190 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 for (i=1; i<size; i++) { 1192 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1193 rlen = range[i+1] - range[i]; 1194 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1196 } 1197 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1198 } else { 1199 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1200 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1201 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1202 } 1203 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1204 1205 /* load up the local column indices */ 1206 nzmax = nz; /* th processor needs space a largest processor needs */ 1207 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1208 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1209 cnt = 0; 1210 for (i=0; i<mat->rmap->n; i++) { 1211 for (j=B->i[i]; j<B->i[i+1]; j++) { 1212 if ((col = garray[B->j[j]]) > cstart) break; 1213 column_indices[cnt++] = col; 1214 } 1215 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1216 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1217 } 1218 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1219 1220 /* store the column indices to the file */ 1221 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1222 if (!rank) { 1223 MPI_Status status; 1224 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 for (i=1; i<size; i++) { 1226 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1227 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1228 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1229 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1231 } 1232 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1235 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1238 } 1239 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1240 1241 /* load up the local column values */ 1242 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1243 cnt = 0; 1244 for (i=0; i<mat->rmap->n; i++) { 1245 for (j=B->i[i]; j<B->i[i+1]; j++) { 1246 if (garray[B->j[j]] > cstart) break; 1247 column_values[cnt++] = B->a[j]; 1248 } 1249 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1250 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1251 } 1252 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1253 1254 /* store the column values to the file */ 1255 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1256 if (!rank) { 1257 MPI_Status status; 1258 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1259 for (i=1; i<size; i++) { 1260 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1261 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1262 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1263 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1264 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1265 } 1266 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1267 } else { 1268 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1269 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1271 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1272 } 1273 ierr = PetscFree(column_values);CHKERRQ(ierr); 1274 1275 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1276 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #include <petscdraw.h> 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank = aij->rank,size = aij->size; 1288 PetscBool isdraw,iascii,isbinary; 1289 PetscViewer sviewer; 1290 PetscViewerFormat format; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1294 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1295 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1296 if (iascii) { 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1299 MatInfo info; 1300 PetscBool inodes; 1301 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1303 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1304 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1306 if (!inodes) { 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1308 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1311 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1312 } 1313 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1315 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1320 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1323 PetscInt inodecount,inodelimit,*inodes; 1324 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1325 if (inodes) { 1326 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1332 PetscFunctionReturn(0); 1333 } 1334 } else if (isbinary) { 1335 if (size == 1) { 1336 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1337 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (isdraw) { 1343 PetscDraw draw; 1344 PetscBool isnull; 1345 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1346 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1347 } 1348 1349 { 1350 /* assemble the entire matrix onto first processor. */ 1351 Mat A; 1352 Mat_SeqAIJ *Aloc; 1353 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1354 MatScalar *a; 1355 1356 if (mat->rmap->N > 1024) { 1357 PetscBool flg = PETSC_FALSE; 1358 1359 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1360 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1361 } 1362 1363 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1364 if (!rank) { 1365 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1366 } else { 1367 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1368 } 1369 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1370 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1371 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1372 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1373 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1374 1375 /* copy over the A part */ 1376 Aloc = (Mat_SeqAIJ*)aij->A->data; 1377 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1378 row = mat->rmap->rstart; 1379 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1380 for (i=0; i<m; i++) { 1381 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1382 row++; 1383 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1384 } 1385 aj = Aloc->j; 1386 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1387 1388 /* copy over the B part */ 1389 Aloc = (Mat_SeqAIJ*)aij->B->data; 1390 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1391 row = mat->rmap->rstart; 1392 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1393 ct = cols; 1394 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1395 for (i=0; i<m; i++) { 1396 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1397 row++; 1398 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1399 } 1400 ierr = PetscFree(ct);CHKERRQ(ierr); 1401 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1402 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1403 /* 1404 Everyone has to call to draw the matrix since the graphics waits are 1405 synchronized across all processors that share the PetscDraw object 1406 */ 1407 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1408 if (!rank) { 1409 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1410 } 1411 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1412 ierr = MatDestroy(&A);CHKERRQ(ierr); 1413 } 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatView_MPIAIJ" 1419 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1420 { 1421 PetscErrorCode ierr; 1422 PetscBool iascii,isdraw,issocket,isbinary; 1423 1424 PetscFunctionBegin; 1425 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1426 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1427 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1428 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1429 if (iascii || isdraw || isbinary || issocket) { 1430 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1431 } 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatSOR_MPIAIJ" 1437 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1438 { 1439 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1440 PetscErrorCode ierr; 1441 Vec bb1 = 0; 1442 PetscBool hasop; 1443 1444 PetscFunctionBegin; 1445 if (flag == SOR_APPLY_UPPER) { 1446 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 1450 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1451 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1452 } 1453 1454 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1455 if (flag & SOR_ZERO_INITIAL_GUESS) { 1456 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1457 its--; 1458 } 1459 1460 while (its--) { 1461 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 1464 /* update rhs: bb1 = bb - B*x */ 1465 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1466 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1467 1468 /* local sweep */ 1469 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1470 } 1471 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1472 if (flag & SOR_ZERO_INITIAL_GUESS) { 1473 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1474 its--; 1475 } 1476 while (its--) { 1477 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 1480 /* update rhs: bb1 = bb - B*x */ 1481 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1482 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1483 1484 /* local sweep */ 1485 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1486 } 1487 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1488 if (flag & SOR_ZERO_INITIAL_GUESS) { 1489 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1490 its--; 1491 } 1492 while (its--) { 1493 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 1496 /* update rhs: bb1 = bb - B*x */ 1497 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1498 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1499 1500 /* local sweep */ 1501 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1502 } 1503 } else if (flag & SOR_EISENSTAT) { 1504 Vec xx1; 1505 1506 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1507 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1508 1509 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 if (!mat->diag) { 1512 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1513 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1514 } 1515 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1516 if (hasop) { 1517 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1518 } else { 1519 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1520 } 1521 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1522 1523 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1524 1525 /* local sweep */ 1526 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1527 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1528 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1529 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1530 1531 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatPermute_MPIAIJ" 1537 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1538 { 1539 Mat aA,aB,Aperm; 1540 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1541 PetscScalar *aa,*ba; 1542 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1543 PetscSF rowsf,sf; 1544 IS parcolp = NULL; 1545 PetscBool done; 1546 PetscErrorCode ierr; 1547 1548 PetscFunctionBegin; 1549 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1550 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1551 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1552 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1553 1554 /* Invert row permutation to find out where my rows should go */ 1555 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1556 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1557 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1558 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1559 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1560 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1561 1562 /* Invert column permutation to find out where my columns should go */ 1563 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1564 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1565 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1566 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1567 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1568 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1569 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1570 1571 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1572 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1573 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1574 1575 /* Find out where my gcols should go */ 1576 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1577 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1578 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1579 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1580 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1581 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1582 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1583 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1584 1585 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1586 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1587 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1588 for (i=0; i<m; i++) { 1589 PetscInt row = rdest[i],rowner; 1590 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1591 for (j=ai[i]; j<ai[i+1]; j++) { 1592 PetscInt cowner,col = cdest[aj[j]]; 1593 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1594 if (rowner == cowner) dnnz[i]++; 1595 else onnz[i]++; 1596 } 1597 for (j=bi[i]; j<bi[i+1]; j++) { 1598 PetscInt cowner,col = gcdest[bj[j]]; 1599 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1600 if (rowner == cowner) dnnz[i]++; 1601 else onnz[i]++; 1602 } 1603 } 1604 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1605 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1606 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1607 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1608 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1609 1610 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1611 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1612 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1613 for (i=0; i<m; i++) { 1614 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1615 PetscInt j0,rowlen; 1616 rowlen = ai[i+1] - ai[i]; 1617 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1618 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1619 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1620 } 1621 rowlen = bi[i+1] - bi[i]; 1622 for (j0=j=0; j<rowlen; j0=j) { 1623 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1624 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1625 } 1626 } 1627 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1628 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1629 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1630 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1631 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1632 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1633 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1634 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1635 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1636 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1637 *B = Aperm; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1643 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1644 { 1645 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1646 Mat A = mat->A,B = mat->B; 1647 PetscErrorCode ierr; 1648 PetscReal isend[5],irecv[5]; 1649 1650 PetscFunctionBegin; 1651 info->block_size = 1.0; 1652 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1653 1654 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1655 isend[3] = info->memory; isend[4] = info->mallocs; 1656 1657 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1658 1659 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1660 isend[3] += info->memory; isend[4] += info->mallocs; 1661 if (flag == MAT_LOCAL) { 1662 info->nz_used = isend[0]; 1663 info->nz_allocated = isend[1]; 1664 info->nz_unneeded = isend[2]; 1665 info->memory = isend[3]; 1666 info->mallocs = isend[4]; 1667 } else if (flag == MAT_GLOBAL_MAX) { 1668 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1669 1670 info->nz_used = irecv[0]; 1671 info->nz_allocated = irecv[1]; 1672 info->nz_unneeded = irecv[2]; 1673 info->memory = irecv[3]; 1674 info->mallocs = irecv[4]; 1675 } else if (flag == MAT_GLOBAL_SUM) { 1676 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1677 1678 info->nz_used = irecv[0]; 1679 info->nz_allocated = irecv[1]; 1680 info->nz_unneeded = irecv[2]; 1681 info->memory = irecv[3]; 1682 info->mallocs = irecv[4]; 1683 } 1684 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1685 info->fill_ratio_needed = 0; 1686 info->factor_mallocs = 0; 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatSetOption_MPIAIJ" 1692 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1693 { 1694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1695 PetscErrorCode ierr; 1696 1697 PetscFunctionBegin; 1698 switch (op) { 1699 case MAT_NEW_NONZERO_LOCATIONS: 1700 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1701 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1702 case MAT_KEEP_NONZERO_PATTERN: 1703 case MAT_NEW_NONZERO_LOCATION_ERR: 1704 case MAT_USE_INODES: 1705 case MAT_IGNORE_ZERO_ENTRIES: 1706 MatCheckPreallocated(A,1); 1707 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1708 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1709 break; 1710 case MAT_ROW_ORIENTED: 1711 a->roworiented = flg; 1712 1713 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1714 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1715 break; 1716 case MAT_NEW_DIAGONALS: 1717 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1718 break; 1719 case MAT_IGNORE_OFF_PROC_ENTRIES: 1720 a->donotstash = flg; 1721 break; 1722 case MAT_SPD: 1723 A->spd_set = PETSC_TRUE; 1724 A->spd = flg; 1725 if (flg) { 1726 A->symmetric = PETSC_TRUE; 1727 A->structurally_symmetric = PETSC_TRUE; 1728 A->symmetric_set = PETSC_TRUE; 1729 A->structurally_symmetric_set = PETSC_TRUE; 1730 } 1731 break; 1732 case MAT_SYMMETRIC: 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_STRUCTURALLY_SYMMETRIC: 1736 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1737 break; 1738 case MAT_HERMITIAN: 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 break; 1741 case MAT_SYMMETRY_ETERNAL: 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 break; 1744 default: 1745 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1746 } 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatGetRow_MPIAIJ" 1752 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1753 { 1754 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1755 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1756 PetscErrorCode ierr; 1757 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1758 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1759 PetscInt *cmap,*idx_p; 1760 1761 PetscFunctionBegin; 1762 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1763 mat->getrowactive = PETSC_TRUE; 1764 1765 if (!mat->rowvalues && (idx || v)) { 1766 /* 1767 allocate enough space to hold information from the longest row. 1768 */ 1769 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1770 PetscInt max = 1,tmp; 1771 for (i=0; i<matin->rmap->n; i++) { 1772 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1773 if (max < tmp) max = tmp; 1774 } 1775 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1776 } 1777 1778 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1779 lrow = row - rstart; 1780 1781 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1782 if (!v) {pvA = 0; pvB = 0;} 1783 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1784 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1785 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1786 nztot = nzA + nzB; 1787 1788 cmap = mat->garray; 1789 if (v || idx) { 1790 if (nztot) { 1791 /* Sort by increasing column numbers, assuming A and B already sorted */ 1792 PetscInt imark = -1; 1793 if (v) { 1794 *v = v_p = mat->rowvalues; 1795 for (i=0; i<nzB; i++) { 1796 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1797 else break; 1798 } 1799 imark = i; 1800 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1801 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1802 } 1803 if (idx) { 1804 *idx = idx_p = mat->rowindices; 1805 if (imark > -1) { 1806 for (i=0; i<imark; i++) { 1807 idx_p[i] = cmap[cworkB[i]]; 1808 } 1809 } else { 1810 for (i=0; i<nzB; i++) { 1811 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1812 else break; 1813 } 1814 imark = i; 1815 } 1816 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1817 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1818 } 1819 } else { 1820 if (idx) *idx = 0; 1821 if (v) *v = 0; 1822 } 1823 } 1824 *nz = nztot; 1825 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1826 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1832 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1833 { 1834 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1835 1836 PetscFunctionBegin; 1837 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1838 aij->getrowactive = PETSC_FALSE; 1839 PetscFunctionReturn(0); 1840 } 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatNorm_MPIAIJ" 1844 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1845 { 1846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1847 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1848 PetscErrorCode ierr; 1849 PetscInt i,j,cstart = mat->cmap->rstart; 1850 PetscReal sum = 0.0; 1851 MatScalar *v; 1852 1853 PetscFunctionBegin; 1854 if (aij->size == 1) { 1855 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1856 } else { 1857 if (type == NORM_FROBENIUS) { 1858 v = amat->a; 1859 for (i=0; i<amat->nz; i++) { 1860 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1861 } 1862 v = bmat->a; 1863 for (i=0; i<bmat->nz; i++) { 1864 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1865 } 1866 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1867 *norm = PetscSqrtReal(*norm); 1868 } else if (type == NORM_1) { /* max column norm */ 1869 PetscReal *tmp,*tmp2; 1870 PetscInt *jj,*garray = aij->garray; 1871 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1872 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1873 *norm = 0.0; 1874 v = amat->a; jj = amat->j; 1875 for (j=0; j<amat->nz; j++) { 1876 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1877 } 1878 v = bmat->a; jj = bmat->j; 1879 for (j=0; j<bmat->nz; j++) { 1880 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1881 } 1882 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1883 for (j=0; j<mat->cmap->N; j++) { 1884 if (tmp2[j] > *norm) *norm = tmp2[j]; 1885 } 1886 ierr = PetscFree(tmp);CHKERRQ(ierr); 1887 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1888 } else if (type == NORM_INFINITY) { /* max row norm */ 1889 PetscReal ntemp = 0.0; 1890 for (j=0; j<aij->A->rmap->n; j++) { 1891 v = amat->a + amat->i[j]; 1892 sum = 0.0; 1893 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1894 sum += PetscAbsScalar(*v); v++; 1895 } 1896 v = bmat->a + bmat->i[j]; 1897 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1898 sum += PetscAbsScalar(*v); v++; 1899 } 1900 if (sum > ntemp) ntemp = sum; 1901 } 1902 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1903 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatTranspose_MPIAIJ" 1910 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1911 { 1912 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1913 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1914 PetscErrorCode ierr; 1915 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1916 PetscInt cstart = A->cmap->rstart,ncol; 1917 Mat B; 1918 MatScalar *array; 1919 1920 PetscFunctionBegin; 1921 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1922 1923 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1924 ai = Aloc->i; aj = Aloc->j; 1925 bi = Bloc->i; bj = Bloc->j; 1926 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1927 PetscInt *d_nnz,*g_nnz,*o_nnz; 1928 PetscSFNode *oloc; 1929 PETSC_UNUSED PetscSF sf; 1930 1931 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1932 /* compute d_nnz for preallocation */ 1933 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1934 for (i=0; i<ai[ma]; i++) { 1935 d_nnz[aj[i]]++; 1936 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1937 } 1938 /* compute local off-diagonal contributions */ 1939 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1940 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1941 /* map those to global */ 1942 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1943 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1944 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1945 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1946 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1947 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1948 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1949 1950 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1951 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1952 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1953 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1954 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1955 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1956 } else { 1957 B = *matout; 1958 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1959 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1960 } 1961 1962 /* copy over the A part */ 1963 array = Aloc->a; 1964 row = A->rmap->rstart; 1965 for (i=0; i<ma; i++) { 1966 ncol = ai[i+1]-ai[i]; 1967 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1968 row++; 1969 array += ncol; aj += ncol; 1970 } 1971 aj = Aloc->j; 1972 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1973 1974 /* copy over the B part */ 1975 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1976 array = Bloc->a; 1977 row = A->rmap->rstart; 1978 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1979 cols_tmp = cols; 1980 for (i=0; i<mb; i++) { 1981 ncol = bi[i+1]-bi[i]; 1982 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1983 row++; 1984 array += ncol; cols_tmp += ncol; 1985 } 1986 ierr = PetscFree(cols);CHKERRQ(ierr); 1987 1988 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1989 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1990 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1991 *matout = B; 1992 } else { 1993 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2000 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2001 { 2002 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2003 Mat a = aij->A,b = aij->B; 2004 PetscErrorCode ierr; 2005 PetscInt s1,s2,s3; 2006 2007 PetscFunctionBegin; 2008 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2009 if (rr) { 2010 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2011 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2012 /* Overlap communication with computation. */ 2013 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2014 } 2015 if (ll) { 2016 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2017 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2018 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2019 } 2020 /* scale the diagonal block */ 2021 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2022 2023 if (rr) { 2024 /* Do a scatter end and then right scale the off-diagonal block */ 2025 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2026 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2033 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2034 { 2035 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2036 PetscErrorCode ierr; 2037 2038 PetscFunctionBegin; 2039 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 #undef __FUNCT__ 2044 #define __FUNCT__ "MatEqual_MPIAIJ" 2045 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2046 { 2047 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2048 Mat a,b,c,d; 2049 PetscBool flg; 2050 PetscErrorCode ierr; 2051 2052 PetscFunctionBegin; 2053 a = matA->A; b = matA->B; 2054 c = matB->A; d = matB->B; 2055 2056 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2057 if (flg) { 2058 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2059 } 2060 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatCopy_MPIAIJ" 2066 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2067 { 2068 PetscErrorCode ierr; 2069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2070 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2071 2072 PetscFunctionBegin; 2073 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2074 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2075 /* because of the column compression in the off-processor part of the matrix a->B, 2076 the number of columns in a->B and b->B may be different, hence we cannot call 2077 the MatCopy() directly on the two parts. If need be, we can provide a more 2078 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2079 then copying the submatrices */ 2080 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2081 } else { 2082 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2083 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatSetUp_MPIAIJ" 2090 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2091 { 2092 PetscErrorCode ierr; 2093 2094 PetscFunctionBegin; 2095 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2101 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2102 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2103 { 2104 PetscInt i,m=Y->rmap->N; 2105 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2106 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2107 const PetscInt *xi = x->i,*yi = y->i; 2108 2109 PetscFunctionBegin; 2110 /* Set the number of nonzeros in the new matrix */ 2111 for (i=0; i<m; i++) { 2112 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2113 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2114 nnz[i] = 0; 2115 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2116 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2117 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2118 nnz[i]++; 2119 } 2120 for (; k<nzy; k++) nnz[i]++; 2121 } 2122 PetscFunctionReturn(0); 2123 } 2124 2125 #undef __FUNCT__ 2126 #define __FUNCT__ "MatAXPY_MPIAIJ" 2127 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2128 { 2129 PetscErrorCode ierr; 2130 PetscInt i; 2131 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2132 PetscBLASInt bnz,one=1; 2133 Mat_SeqAIJ *x,*y; 2134 2135 PetscFunctionBegin; 2136 if (str == SAME_NONZERO_PATTERN) { 2137 PetscScalar alpha = a; 2138 x = (Mat_SeqAIJ*)xx->A->data; 2139 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2140 y = (Mat_SeqAIJ*)yy->A->data; 2141 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2142 x = (Mat_SeqAIJ*)xx->B->data; 2143 y = (Mat_SeqAIJ*)yy->B->data; 2144 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2145 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2146 } else if (str == SUBSET_NONZERO_PATTERN) { 2147 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2148 2149 x = (Mat_SeqAIJ*)xx->B->data; 2150 y = (Mat_SeqAIJ*)yy->B->data; 2151 if (y->xtoy && y->XtoY != xx->B) { 2152 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2153 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2154 } 2155 if (!y->xtoy) { /* get xtoy */ 2156 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2157 y->XtoY = xx->B; 2158 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2159 } 2160 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2161 } else { 2162 Mat B; 2163 PetscInt *nnz_d,*nnz_o; 2164 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2165 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2166 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2167 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2168 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2169 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2170 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2171 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2172 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2173 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2174 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2175 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2176 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2177 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2178 } 2179 PetscFunctionReturn(0); 2180 } 2181 2182 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2183 2184 #undef __FUNCT__ 2185 #define __FUNCT__ "MatConjugate_MPIAIJ" 2186 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2187 { 2188 #if defined(PETSC_USE_COMPLEX) 2189 PetscErrorCode ierr; 2190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2191 2192 PetscFunctionBegin; 2193 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2194 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2195 #else 2196 PetscFunctionBegin; 2197 #endif 2198 PetscFunctionReturn(0); 2199 } 2200 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatRealPart_MPIAIJ" 2203 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2204 { 2205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2210 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2216 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2217 { 2218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2223 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #if defined(PETSC_HAVE_PBGL) 2228 2229 #include <boost/parallel/mpi/bsp_process_group.hpp> 2230 #include <boost/graph/distributed/ilu_default_graph.hpp> 2231 #include <boost/graph/distributed/ilu_0_block.hpp> 2232 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2233 #include <boost/graph/distributed/petsc/interface.hpp> 2234 #include <boost/multi_array.hpp> 2235 #include <boost/parallel/distributed_property_map->hpp> 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2239 /* 2240 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2241 */ 2242 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2243 { 2244 namespace petsc = boost::distributed::petsc; 2245 2246 namespace graph_dist = boost::graph::distributed; 2247 using boost::graph::distributed::ilu_default::process_group_type; 2248 using boost::graph::ilu_permuted; 2249 2250 PetscBool row_identity, col_identity; 2251 PetscContainer c; 2252 PetscInt m, n, M, N; 2253 PetscErrorCode ierr; 2254 2255 PetscFunctionBegin; 2256 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2257 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2258 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2259 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2260 2261 process_group_type pg; 2262 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2263 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2264 lgraph_type& level_graph = *lgraph_p; 2265 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2266 2267 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2268 ilu_permuted(level_graph); 2269 2270 /* put together the new matrix */ 2271 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2272 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2273 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2274 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2275 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2276 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2277 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2278 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2279 2280 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2281 ierr = PetscContainerSetPointer(c, lgraph_p); 2282 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2283 ierr = PetscContainerDestroy(&c); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2289 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2290 { 2291 PetscFunctionBegin; 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatSolve_MPIAIJ" 2297 /* 2298 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2299 */ 2300 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2301 { 2302 namespace graph_dist = boost::graph::distributed; 2303 2304 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2305 lgraph_type *lgraph_p; 2306 PetscContainer c; 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2311 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2312 ierr = VecCopy(b, x);CHKERRQ(ierr); 2313 2314 PetscScalar *array_x; 2315 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2316 PetscInt sx; 2317 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2318 2319 PetscScalar *array_b; 2320 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2321 PetscInt sb; 2322 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2323 2324 lgraph_type& level_graph = *lgraph_p; 2325 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2326 2327 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2328 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2329 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2330 2331 typedef boost::iterator_property_map<array_ref_type::iterator, 2332 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2333 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2334 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2335 2336 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2337 PetscFunctionReturn(0); 2338 } 2339 #endif 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatDestroy_MatRedundant" 2343 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2344 { 2345 PetscErrorCode ierr; 2346 Mat_Redundant *redund; 2347 PetscInt i; 2348 PetscMPIInt size; 2349 2350 PetscFunctionBegin; 2351 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2352 if (size == 1) { 2353 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2354 redund = a->redundant; 2355 } else { 2356 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2357 redund = a->redundant; 2358 } 2359 if (redund){ 2360 if (redund->matseq) { /* via MatGetSubMatrices() */ 2361 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2362 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2363 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2364 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2365 } else { 2366 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2367 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2368 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2369 for (i=0; i<redund->nrecvs; i++) { 2370 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2371 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2372 } 2373 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2374 } 2375 2376 if (redund->psubcomm) { 2377 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2378 } 2379 ierr = redund->Destroy(A);CHKERRQ(ierr); 2380 ierr = PetscFree(redund);CHKERRQ(ierr); 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2387 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2388 { 2389 PetscMPIInt rank,size; 2390 MPI_Comm comm; 2391 PetscErrorCode ierr; 2392 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2393 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2394 PetscInt *rowrange = mat->rmap->range; 2395 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2396 Mat A = aij->A,B=aij->B,C=*matredundant; 2397 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2398 PetscScalar *sbuf_a; 2399 PetscInt nzlocal=a->nz+b->nz; 2400 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2401 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2402 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2403 MatScalar *aworkA,*aworkB; 2404 PetscScalar *vals; 2405 PetscMPIInt tag1,tag2,tag3,imdex; 2406 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2407 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2408 MPI_Status recv_status,*send_status; 2409 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2410 PetscInt **rbuf_j=NULL; 2411 PetscScalar **rbuf_a=NULL; 2412 Mat_Redundant *redund =NULL; 2413 2414 PetscFunctionBegin; 2415 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2416 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2417 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2418 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2419 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2420 2421 if (reuse == MAT_REUSE_MATRIX) { 2422 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2423 if (subsize == 1) { 2424 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2425 redund = c->redundant; 2426 } else { 2427 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2428 redund = c->redundant; 2429 } 2430 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2431 2432 nsends = redund->nsends; 2433 nrecvs = redund->nrecvs; 2434 send_rank = redund->send_rank; 2435 recv_rank = redund->recv_rank; 2436 sbuf_nz = redund->sbuf_nz; 2437 rbuf_nz = redund->rbuf_nz; 2438 sbuf_j = redund->sbuf_j; 2439 sbuf_a = redund->sbuf_a; 2440 rbuf_j = redund->rbuf_j; 2441 rbuf_a = redund->rbuf_a; 2442 } 2443 2444 if (reuse == MAT_INITIAL_MATRIX) { 2445 PetscInt nleftover,np_subcomm; 2446 2447 /* get the destination processors' id send_rank, nsends and nrecvs */ 2448 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2449 2450 np_subcomm = size/nsubcomm; 2451 nleftover = size - nsubcomm*np_subcomm; 2452 2453 /* block of codes below is specific for INTERLACED */ 2454 /* ------------------------------------------------*/ 2455 nsends = 0; nrecvs = 0; 2456 for (i=0; i<size; i++) { 2457 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2458 send_rank[nsends++] = i; 2459 recv_rank[nrecvs++] = i; 2460 } 2461 } 2462 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2463 i = size-nleftover-1; 2464 j = 0; 2465 while (j < nsubcomm - nleftover) { 2466 send_rank[nsends++] = i; 2467 i--; j++; 2468 } 2469 } 2470 2471 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2472 for (i=0; i<nleftover; i++) { 2473 recv_rank[nrecvs++] = size-nleftover+i; 2474 } 2475 } 2476 /*----------------------------------------------*/ 2477 2478 /* allocate sbuf_j, sbuf_a */ 2479 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2480 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2481 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2482 /* 2483 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2484 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2485 */ 2486 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2487 2488 /* copy mat's local entries into the buffers */ 2489 if (reuse == MAT_INITIAL_MATRIX) { 2490 rownz_max = 0; 2491 rptr = sbuf_j; 2492 cols = sbuf_j + rend-rstart + 1; 2493 vals = sbuf_a; 2494 rptr[0] = 0; 2495 for (i=0; i<rend-rstart; i++) { 2496 row = i + rstart; 2497 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2498 ncols = nzA + nzB; 2499 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2500 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2501 /* load the column indices for this row into cols */ 2502 lwrite = 0; 2503 for (l=0; l<nzB; l++) { 2504 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2505 vals[lwrite] = aworkB[l]; 2506 cols[lwrite++] = ctmp; 2507 } 2508 } 2509 for (l=0; l<nzA; l++) { 2510 vals[lwrite] = aworkA[l]; 2511 cols[lwrite++] = cstart + cworkA[l]; 2512 } 2513 for (l=0; l<nzB; l++) { 2514 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2515 vals[lwrite] = aworkB[l]; 2516 cols[lwrite++] = ctmp; 2517 } 2518 } 2519 vals += ncols; 2520 cols += ncols; 2521 rptr[i+1] = rptr[i] + ncols; 2522 if (rownz_max < ncols) rownz_max = ncols; 2523 } 2524 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2525 } else { /* only copy matrix values into sbuf_a */ 2526 rptr = sbuf_j; 2527 vals = sbuf_a; 2528 rptr[0] = 0; 2529 for (i=0; i<rend-rstart; i++) { 2530 row = i + rstart; 2531 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2532 ncols = nzA + nzB; 2533 cworkB = b->j + b->i[i]; 2534 aworkA = a->a + a->i[i]; 2535 aworkB = b->a + b->i[i]; 2536 lwrite = 0; 2537 for (l=0; l<nzB; l++) { 2538 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2539 } 2540 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2541 for (l=0; l<nzB; l++) { 2542 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2543 } 2544 vals += ncols; 2545 rptr[i+1] = rptr[i] + ncols; 2546 } 2547 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2548 2549 /* send nzlocal to others, and recv other's nzlocal */ 2550 /*--------------------------------------------------*/ 2551 if (reuse == MAT_INITIAL_MATRIX) { 2552 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2553 2554 s_waits2 = s_waits3 + nsends; 2555 s_waits1 = s_waits2 + nsends; 2556 r_waits1 = s_waits1 + nsends; 2557 r_waits2 = r_waits1 + nrecvs; 2558 r_waits3 = r_waits2 + nrecvs; 2559 } else { 2560 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2561 2562 r_waits3 = s_waits3 + nsends; 2563 } 2564 2565 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2566 if (reuse == MAT_INITIAL_MATRIX) { 2567 /* get new tags to keep the communication clean */ 2568 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2569 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2570 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2571 2572 /* post receives of other's nzlocal */ 2573 for (i=0; i<nrecvs; i++) { 2574 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2575 } 2576 /* send nzlocal to others */ 2577 for (i=0; i<nsends; i++) { 2578 sbuf_nz[i] = nzlocal; 2579 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2580 } 2581 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2582 count = nrecvs; 2583 while (count) { 2584 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2585 2586 recv_rank[imdex] = recv_status.MPI_SOURCE; 2587 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2588 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2589 2590 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2591 2592 rbuf_nz[imdex] += i + 2; 2593 2594 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2595 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2596 count--; 2597 } 2598 /* wait on sends of nzlocal */ 2599 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2600 /* send mat->i,j to others, and recv from other's */ 2601 /*------------------------------------------------*/ 2602 for (i=0; i<nsends; i++) { 2603 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2604 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2605 } 2606 /* wait on receives of mat->i,j */ 2607 /*------------------------------*/ 2608 count = nrecvs; 2609 while (count) { 2610 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2611 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2612 count--; 2613 } 2614 /* wait on sends of mat->i,j */ 2615 /*---------------------------*/ 2616 if (nsends) { 2617 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2618 } 2619 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2620 2621 /* post receives, send and receive mat->a */ 2622 /*----------------------------------------*/ 2623 for (imdex=0; imdex<nrecvs; imdex++) { 2624 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2625 } 2626 for (i=0; i<nsends; i++) { 2627 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2628 } 2629 count = nrecvs; 2630 while (count) { 2631 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2632 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2633 count--; 2634 } 2635 if (nsends) { 2636 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2637 } 2638 2639 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2640 2641 /* create redundant matrix */ 2642 /*-------------------------*/ 2643 if (reuse == MAT_INITIAL_MATRIX) { 2644 const PetscInt *range; 2645 PetscInt rstart_sub,rend_sub,mloc_sub; 2646 2647 /* compute rownz_max for preallocation */ 2648 for (imdex=0; imdex<nrecvs; imdex++) { 2649 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2650 rptr = rbuf_j[imdex]; 2651 for (i=0; i<j; i++) { 2652 ncols = rptr[i+1] - rptr[i]; 2653 if (rownz_max < ncols) rownz_max = ncols; 2654 } 2655 } 2656 2657 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2658 2659 /* get local size of redundant matrix 2660 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2661 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2662 rstart_sub = range[nsubcomm*subrank]; 2663 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2664 rend_sub = range[nsubcomm*(subrank+1)]; 2665 } else { 2666 rend_sub = mat->rmap->N; 2667 } 2668 mloc_sub = rend_sub - rstart_sub; 2669 2670 if (M == N) { 2671 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2672 } else { /* non-square matrix */ 2673 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2674 } 2675 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2676 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2677 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2678 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2679 } else { 2680 C = *matredundant; 2681 } 2682 2683 /* insert local matrix entries */ 2684 rptr = sbuf_j; 2685 cols = sbuf_j + rend-rstart + 1; 2686 vals = sbuf_a; 2687 for (i=0; i<rend-rstart; i++) { 2688 row = i + rstart; 2689 ncols = rptr[i+1] - rptr[i]; 2690 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2691 vals += ncols; 2692 cols += ncols; 2693 } 2694 /* insert received matrix entries */ 2695 for (imdex=0; imdex<nrecvs; imdex++) { 2696 rstart = rowrange[recv_rank[imdex]]; 2697 rend = rowrange[recv_rank[imdex]+1]; 2698 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2699 rptr = rbuf_j[imdex]; 2700 cols = rbuf_j[imdex] + rend-rstart + 1; 2701 vals = rbuf_a[imdex]; 2702 for (i=0; i<rend-rstart; i++) { 2703 row = i + rstart; 2704 ncols = rptr[i+1] - rptr[i]; 2705 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2706 vals += ncols; 2707 cols += ncols; 2708 } 2709 } 2710 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2711 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2712 2713 if (reuse == MAT_INITIAL_MATRIX) { 2714 *matredundant = C; 2715 2716 /* create a supporting struct and attach it to C for reuse */ 2717 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2718 if (subsize == 1) { 2719 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2720 c->redundant = redund; 2721 } else { 2722 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2723 c->redundant = redund; 2724 } 2725 2726 redund->nzlocal = nzlocal; 2727 redund->nsends = nsends; 2728 redund->nrecvs = nrecvs; 2729 redund->send_rank = send_rank; 2730 redund->recv_rank = recv_rank; 2731 redund->sbuf_nz = sbuf_nz; 2732 redund->rbuf_nz = rbuf_nz; 2733 redund->sbuf_j = sbuf_j; 2734 redund->sbuf_a = sbuf_a; 2735 redund->rbuf_j = rbuf_j; 2736 redund->rbuf_a = rbuf_a; 2737 redund->psubcomm = NULL; 2738 2739 redund->Destroy = C->ops->destroy; 2740 C->ops->destroy = MatDestroy_MatRedundant; 2741 } 2742 PetscFunctionReturn(0); 2743 } 2744 2745 #undef __FUNCT__ 2746 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2747 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2748 { 2749 PetscErrorCode ierr; 2750 MPI_Comm comm; 2751 PetscMPIInt size,subsize; 2752 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2753 Mat_Redundant *redund=NULL; 2754 PetscSubcomm psubcomm=NULL; 2755 MPI_Comm subcomm_in=subcomm; 2756 Mat *matseq; 2757 IS isrow,iscol; 2758 2759 PetscFunctionBegin; 2760 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2761 if (reuse == MAT_INITIAL_MATRIX) { 2762 /* create psubcomm, then get subcomm */ 2763 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2764 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2765 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2766 2767 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2768 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2769 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2770 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2771 subcomm = psubcomm->comm; 2772 } else { /* retrieve psubcomm and subcomm */ 2773 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2774 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2775 if (subsize == 1) { 2776 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2777 redund = c->redundant; 2778 } else { 2779 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2780 redund = c->redundant; 2781 } 2782 psubcomm = redund->psubcomm; 2783 } 2784 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2785 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2786 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2787 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2788 if (subsize == 1) { 2789 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2790 c->redundant->psubcomm = psubcomm; 2791 } else { 2792 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2793 c->redundant->psubcomm = psubcomm ; 2794 } 2795 } 2796 PetscFunctionReturn(0); 2797 } 2798 } 2799 2800 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2801 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2802 if (reuse == MAT_INITIAL_MATRIX) { 2803 /* create a local sequential matrix matseq[0] */ 2804 mloc_sub = PETSC_DECIDE; 2805 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2806 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2807 rstart = rend - mloc_sub; 2808 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2809 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2810 } else { /* reuse == MAT_REUSE_MATRIX */ 2811 if (subsize == 1) { 2812 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2813 redund = c->redundant; 2814 } else { 2815 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2816 redund = c->redundant; 2817 } 2818 2819 isrow = redund->isrow; 2820 iscol = redund->iscol; 2821 matseq = redund->matseq; 2822 } 2823 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2824 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2825 2826 if (reuse == MAT_INITIAL_MATRIX) { 2827 /* create a supporting struct and attach it to C for reuse */ 2828 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2829 if (subsize == 1) { 2830 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2831 c->redundant = redund; 2832 } else { 2833 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2834 c->redundant = redund; 2835 } 2836 redund->isrow = isrow; 2837 redund->iscol = iscol; 2838 redund->matseq = matseq; 2839 redund->psubcomm = psubcomm; 2840 redund->Destroy = (*matredundant)->ops->destroy; 2841 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2842 } 2843 PetscFunctionReturn(0); 2844 } 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2848 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2849 { 2850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2851 PetscErrorCode ierr; 2852 PetscInt i,*idxb = 0; 2853 PetscScalar *va,*vb; 2854 Vec vtmp; 2855 2856 PetscFunctionBegin; 2857 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2858 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2859 if (idx) { 2860 for (i=0; i<A->rmap->n; i++) { 2861 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2862 } 2863 } 2864 2865 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2866 if (idx) { 2867 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2868 } 2869 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2870 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2871 2872 for (i=0; i<A->rmap->n; i++) { 2873 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2874 va[i] = vb[i]; 2875 if (idx) idx[i] = a->garray[idxb[i]]; 2876 } 2877 } 2878 2879 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2880 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2881 ierr = PetscFree(idxb);CHKERRQ(ierr); 2882 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2883 PetscFunctionReturn(0); 2884 } 2885 2886 #undef __FUNCT__ 2887 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2888 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2889 { 2890 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2891 PetscErrorCode ierr; 2892 PetscInt i,*idxb = 0; 2893 PetscScalar *va,*vb; 2894 Vec vtmp; 2895 2896 PetscFunctionBegin; 2897 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2898 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2899 if (idx) { 2900 for (i=0; i<A->cmap->n; i++) { 2901 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2902 } 2903 } 2904 2905 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2906 if (idx) { 2907 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2908 } 2909 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2910 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2911 2912 for (i=0; i<A->rmap->n; i++) { 2913 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2914 va[i] = vb[i]; 2915 if (idx) idx[i] = a->garray[idxb[i]]; 2916 } 2917 } 2918 2919 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2920 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2921 ierr = PetscFree(idxb);CHKERRQ(ierr); 2922 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2923 PetscFunctionReturn(0); 2924 } 2925 2926 #undef __FUNCT__ 2927 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2928 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2929 { 2930 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2931 PetscInt n = A->rmap->n; 2932 PetscInt cstart = A->cmap->rstart; 2933 PetscInt *cmap = mat->garray; 2934 PetscInt *diagIdx, *offdiagIdx; 2935 Vec diagV, offdiagV; 2936 PetscScalar *a, *diagA, *offdiagA; 2937 PetscInt r; 2938 PetscErrorCode ierr; 2939 2940 PetscFunctionBegin; 2941 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2942 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2943 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2944 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2945 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2946 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2947 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2948 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2949 for (r = 0; r < n; ++r) { 2950 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2951 a[r] = diagA[r]; 2952 idx[r] = cstart + diagIdx[r]; 2953 } else { 2954 a[r] = offdiagA[r]; 2955 idx[r] = cmap[offdiagIdx[r]]; 2956 } 2957 } 2958 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2959 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2960 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2961 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2962 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2963 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2964 PetscFunctionReturn(0); 2965 } 2966 2967 #undef __FUNCT__ 2968 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2969 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2970 { 2971 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2972 PetscInt n = A->rmap->n; 2973 PetscInt cstart = A->cmap->rstart; 2974 PetscInt *cmap = mat->garray; 2975 PetscInt *diagIdx, *offdiagIdx; 2976 Vec diagV, offdiagV; 2977 PetscScalar *a, *diagA, *offdiagA; 2978 PetscInt r; 2979 PetscErrorCode ierr; 2980 2981 PetscFunctionBegin; 2982 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2983 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2984 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2985 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2986 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2987 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2988 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2989 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2990 for (r = 0; r < n; ++r) { 2991 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2992 a[r] = diagA[r]; 2993 idx[r] = cstart + diagIdx[r]; 2994 } else { 2995 a[r] = offdiagA[r]; 2996 idx[r] = cmap[offdiagIdx[r]]; 2997 } 2998 } 2999 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3000 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3001 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3002 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3003 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3004 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3005 PetscFunctionReturn(0); 3006 } 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3010 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3011 { 3012 PetscErrorCode ierr; 3013 Mat *dummy; 3014 3015 PetscFunctionBegin; 3016 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3017 *newmat = *dummy; 3018 ierr = PetscFree(dummy);CHKERRQ(ierr); 3019 PetscFunctionReturn(0); 3020 } 3021 3022 #undef __FUNCT__ 3023 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3024 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3025 { 3026 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3027 PetscErrorCode ierr; 3028 3029 PetscFunctionBegin; 3030 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3031 PetscFunctionReturn(0); 3032 } 3033 3034 #undef __FUNCT__ 3035 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3036 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3037 { 3038 PetscErrorCode ierr; 3039 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3040 3041 PetscFunctionBegin; 3042 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3043 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3044 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3045 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3046 PetscFunctionReturn(0); 3047 } 3048 3049 /* -------------------------------------------------------------------*/ 3050 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3051 MatGetRow_MPIAIJ, 3052 MatRestoreRow_MPIAIJ, 3053 MatMult_MPIAIJ, 3054 /* 4*/ MatMultAdd_MPIAIJ, 3055 MatMultTranspose_MPIAIJ, 3056 MatMultTransposeAdd_MPIAIJ, 3057 #if defined(PETSC_HAVE_PBGL) 3058 MatSolve_MPIAIJ, 3059 #else 3060 0, 3061 #endif 3062 0, 3063 0, 3064 /*10*/ 0, 3065 0, 3066 0, 3067 MatSOR_MPIAIJ, 3068 MatTranspose_MPIAIJ, 3069 /*15*/ MatGetInfo_MPIAIJ, 3070 MatEqual_MPIAIJ, 3071 MatGetDiagonal_MPIAIJ, 3072 MatDiagonalScale_MPIAIJ, 3073 MatNorm_MPIAIJ, 3074 /*20*/ MatAssemblyBegin_MPIAIJ, 3075 MatAssemblyEnd_MPIAIJ, 3076 MatSetOption_MPIAIJ, 3077 MatZeroEntries_MPIAIJ, 3078 /*24*/ MatZeroRows_MPIAIJ, 3079 0, 3080 #if defined(PETSC_HAVE_PBGL) 3081 0, 3082 #else 3083 0, 3084 #endif 3085 0, 3086 0, 3087 /*29*/ MatSetUp_MPIAIJ, 3088 #if defined(PETSC_HAVE_PBGL) 3089 0, 3090 #else 3091 0, 3092 #endif 3093 0, 3094 0, 3095 0, 3096 /*34*/ MatDuplicate_MPIAIJ, 3097 0, 3098 0, 3099 0, 3100 0, 3101 /*39*/ MatAXPY_MPIAIJ, 3102 MatGetSubMatrices_MPIAIJ, 3103 MatIncreaseOverlap_MPIAIJ, 3104 MatGetValues_MPIAIJ, 3105 MatCopy_MPIAIJ, 3106 /*44*/ MatGetRowMax_MPIAIJ, 3107 MatScale_MPIAIJ, 3108 0, 3109 0, 3110 MatZeroRowsColumns_MPIAIJ, 3111 /*49*/ MatSetRandom_MPIAIJ, 3112 0, 3113 0, 3114 0, 3115 0, 3116 /*54*/ MatFDColoringCreate_MPIXAIJ, 3117 0, 3118 MatSetUnfactored_MPIAIJ, 3119 MatPermute_MPIAIJ, 3120 0, 3121 /*59*/ MatGetSubMatrix_MPIAIJ, 3122 MatDestroy_MPIAIJ, 3123 MatView_MPIAIJ, 3124 0, 3125 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3126 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3127 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3128 0, 3129 0, 3130 0, 3131 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3132 MatGetRowMinAbs_MPIAIJ, 3133 0, 3134 MatSetColoring_MPIAIJ, 3135 0, 3136 MatSetValuesAdifor_MPIAIJ, 3137 /*75*/ MatFDColoringApply_AIJ, 3138 0, 3139 0, 3140 0, 3141 MatFindZeroDiagonals_MPIAIJ, 3142 /*80*/ 0, 3143 0, 3144 0, 3145 /*83*/ MatLoad_MPIAIJ, 3146 0, 3147 0, 3148 0, 3149 0, 3150 0, 3151 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3152 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3153 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3154 MatPtAP_MPIAIJ_MPIAIJ, 3155 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3156 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3157 0, 3158 0, 3159 0, 3160 0, 3161 /*99*/ 0, 3162 0, 3163 0, 3164 MatConjugate_MPIAIJ, 3165 0, 3166 /*104*/MatSetValuesRow_MPIAIJ, 3167 MatRealPart_MPIAIJ, 3168 MatImaginaryPart_MPIAIJ, 3169 0, 3170 0, 3171 /*109*/0, 3172 MatGetRedundantMatrix_MPIAIJ, 3173 MatGetRowMin_MPIAIJ, 3174 0, 3175 0, 3176 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3177 0, 3178 0, 3179 0, 3180 0, 3181 /*119*/0, 3182 0, 3183 0, 3184 0, 3185 MatGetMultiProcBlock_MPIAIJ, 3186 /*124*/MatFindNonzeroRows_MPIAIJ, 3187 MatGetColumnNorms_MPIAIJ, 3188 MatInvertBlockDiagonal_MPIAIJ, 3189 0, 3190 MatGetSubMatricesParallel_MPIAIJ, 3191 /*129*/0, 3192 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3193 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3194 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3195 0, 3196 /*134*/0, 3197 0, 3198 0, 3199 0, 3200 0, 3201 /*139*/0, 3202 0, 3203 0, 3204 MatFDColoringSetUp_MPIXAIJ 3205 }; 3206 3207 /* ----------------------------------------------------------------------------------------*/ 3208 3209 #undef __FUNCT__ 3210 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3211 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3212 { 3213 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3214 PetscErrorCode ierr; 3215 3216 PetscFunctionBegin; 3217 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3218 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3219 PetscFunctionReturn(0); 3220 } 3221 3222 #undef __FUNCT__ 3223 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3224 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3225 { 3226 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3227 PetscErrorCode ierr; 3228 3229 PetscFunctionBegin; 3230 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3231 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3232 PetscFunctionReturn(0); 3233 } 3234 3235 #undef __FUNCT__ 3236 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3237 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3238 { 3239 Mat_MPIAIJ *b; 3240 PetscErrorCode ierr; 3241 3242 PetscFunctionBegin; 3243 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3244 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3245 b = (Mat_MPIAIJ*)B->data; 3246 3247 if (!B->preallocated) { 3248 /* Explicitly create 2 MATSEQAIJ matrices. */ 3249 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3250 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3251 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3252 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3253 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3254 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3255 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3256 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3257 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3258 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3259 } 3260 3261 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3262 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3263 B->preallocated = PETSC_TRUE; 3264 PetscFunctionReturn(0); 3265 } 3266 3267 #undef __FUNCT__ 3268 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3269 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3270 { 3271 Mat mat; 3272 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3273 PetscErrorCode ierr; 3274 3275 PetscFunctionBegin; 3276 *newmat = 0; 3277 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3278 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3279 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3280 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3281 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3282 a = (Mat_MPIAIJ*)mat->data; 3283 3284 mat->factortype = matin->factortype; 3285 mat->assembled = PETSC_TRUE; 3286 mat->insertmode = NOT_SET_VALUES; 3287 mat->preallocated = PETSC_TRUE; 3288 3289 a->size = oldmat->size; 3290 a->rank = oldmat->rank; 3291 a->donotstash = oldmat->donotstash; 3292 a->roworiented = oldmat->roworiented; 3293 a->rowindices = 0; 3294 a->rowvalues = 0; 3295 a->getrowactive = PETSC_FALSE; 3296 3297 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3298 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3299 3300 if (oldmat->colmap) { 3301 #if defined(PETSC_USE_CTABLE) 3302 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3303 #else 3304 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3305 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3306 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3307 #endif 3308 } else a->colmap = 0; 3309 if (oldmat->garray) { 3310 PetscInt len; 3311 len = oldmat->B->cmap->n; 3312 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3313 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3314 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3315 } else a->garray = 0; 3316 3317 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3319 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3320 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3321 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3322 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3323 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3324 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3325 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3326 *newmat = mat; 3327 PetscFunctionReturn(0); 3328 } 3329 3330 3331 3332 #undef __FUNCT__ 3333 #define __FUNCT__ "MatLoad_MPIAIJ" 3334 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3335 { 3336 PetscScalar *vals,*svals; 3337 MPI_Comm comm; 3338 PetscErrorCode ierr; 3339 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3340 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3341 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3342 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3343 PetscInt cend,cstart,n,*rowners,sizesset=1; 3344 int fd; 3345 PetscInt bs = 1; 3346 3347 PetscFunctionBegin; 3348 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3349 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3350 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3351 if (!rank) { 3352 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3353 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3354 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3355 } 3356 3357 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3358 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3359 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3360 3361 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3362 3363 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3364 M = header[1]; N = header[2]; 3365 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3366 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3367 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3368 3369 /* If global sizes are set, check if they are consistent with that given in the file */ 3370 if (sizesset) { 3371 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3372 } 3373 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3374 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3375 3376 /* determine ownership of all (block) rows */ 3377 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3378 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3379 else m = newMat->rmap->n; /* Set by user */ 3380 3381 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3382 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3383 3384 /* First process needs enough room for process with most rows */ 3385 if (!rank) { 3386 mmax = rowners[1]; 3387 for (i=2; i<=size; i++) { 3388 mmax = PetscMax(mmax, rowners[i]); 3389 } 3390 } else mmax = -1; /* unused, but compilers complain */ 3391 3392 rowners[0] = 0; 3393 for (i=2; i<=size; i++) { 3394 rowners[i] += rowners[i-1]; 3395 } 3396 rstart = rowners[rank]; 3397 rend = rowners[rank+1]; 3398 3399 /* distribute row lengths to all processors */ 3400 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3401 if (!rank) { 3402 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3403 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3404 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3405 for (j=0; j<m; j++) { 3406 procsnz[0] += ourlens[j]; 3407 } 3408 for (i=1; i<size; i++) { 3409 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3410 /* calculate the number of nonzeros on each processor */ 3411 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3412 procsnz[i] += rowlengths[j]; 3413 } 3414 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3415 } 3416 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3417 } else { 3418 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3419 } 3420 3421 if (!rank) { 3422 /* determine max buffer needed and allocate it */ 3423 maxnz = 0; 3424 for (i=0; i<size; i++) { 3425 maxnz = PetscMax(maxnz,procsnz[i]); 3426 } 3427 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3428 3429 /* read in my part of the matrix column indices */ 3430 nz = procsnz[0]; 3431 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3432 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3433 3434 /* read in every one elses and ship off */ 3435 for (i=1; i<size; i++) { 3436 nz = procsnz[i]; 3437 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3438 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3439 } 3440 ierr = PetscFree(cols);CHKERRQ(ierr); 3441 } else { 3442 /* determine buffer space needed for message */ 3443 nz = 0; 3444 for (i=0; i<m; i++) { 3445 nz += ourlens[i]; 3446 } 3447 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3448 3449 /* receive message of column indices*/ 3450 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3451 } 3452 3453 /* determine column ownership if matrix is not square */ 3454 if (N != M) { 3455 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3456 else n = newMat->cmap->n; 3457 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3458 cstart = cend - n; 3459 } else { 3460 cstart = rstart; 3461 cend = rend; 3462 n = cend - cstart; 3463 } 3464 3465 /* loop over local rows, determining number of off diagonal entries */ 3466 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3467 jj = 0; 3468 for (i=0; i<m; i++) { 3469 for (j=0; j<ourlens[i]; j++) { 3470 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3471 jj++; 3472 } 3473 } 3474 3475 for (i=0; i<m; i++) { 3476 ourlens[i] -= offlens[i]; 3477 } 3478 if (!sizesset) { 3479 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3480 } 3481 3482 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3483 3484 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3485 3486 for (i=0; i<m; i++) { 3487 ourlens[i] += offlens[i]; 3488 } 3489 3490 if (!rank) { 3491 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3492 3493 /* read in my part of the matrix numerical values */ 3494 nz = procsnz[0]; 3495 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3496 3497 /* insert into matrix */ 3498 jj = rstart; 3499 smycols = mycols; 3500 svals = vals; 3501 for (i=0; i<m; i++) { 3502 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3503 smycols += ourlens[i]; 3504 svals += ourlens[i]; 3505 jj++; 3506 } 3507 3508 /* read in other processors and ship out */ 3509 for (i=1; i<size; i++) { 3510 nz = procsnz[i]; 3511 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3512 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3513 } 3514 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3515 } else { 3516 /* receive numeric values */ 3517 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3518 3519 /* receive message of values*/ 3520 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3521 3522 /* insert into matrix */ 3523 jj = rstart; 3524 smycols = mycols; 3525 svals = vals; 3526 for (i=0; i<m; i++) { 3527 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3528 smycols += ourlens[i]; 3529 svals += ourlens[i]; 3530 jj++; 3531 } 3532 } 3533 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3534 ierr = PetscFree(vals);CHKERRQ(ierr); 3535 ierr = PetscFree(mycols);CHKERRQ(ierr); 3536 ierr = PetscFree(rowners);CHKERRQ(ierr); 3537 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3538 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3539 PetscFunctionReturn(0); 3540 } 3541 3542 #undef __FUNCT__ 3543 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3544 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3545 { 3546 PetscErrorCode ierr; 3547 IS iscol_local; 3548 PetscInt csize; 3549 3550 PetscFunctionBegin; 3551 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3552 if (call == MAT_REUSE_MATRIX) { 3553 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3554 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3555 } else { 3556 PetscInt cbs; 3557 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3558 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3559 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3560 } 3561 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3562 if (call == MAT_INITIAL_MATRIX) { 3563 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3564 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3565 } 3566 PetscFunctionReturn(0); 3567 } 3568 3569 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3570 #undef __FUNCT__ 3571 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3572 /* 3573 Not great since it makes two copies of the submatrix, first an SeqAIJ 3574 in local and then by concatenating the local matrices the end result. 3575 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3576 3577 Note: This requires a sequential iscol with all indices. 3578 */ 3579 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3580 { 3581 PetscErrorCode ierr; 3582 PetscMPIInt rank,size; 3583 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3584 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3585 PetscBool allcolumns, colflag; 3586 Mat M,Mreuse; 3587 MatScalar *vwork,*aa; 3588 MPI_Comm comm; 3589 Mat_SeqAIJ *aij; 3590 3591 PetscFunctionBegin; 3592 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3593 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3594 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3595 3596 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3597 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3598 if (colflag && ncol == mat->cmap->N) { 3599 allcolumns = PETSC_TRUE; 3600 } else { 3601 allcolumns = PETSC_FALSE; 3602 } 3603 if (call == MAT_REUSE_MATRIX) { 3604 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3605 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3606 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3607 } else { 3608 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3609 } 3610 3611 /* 3612 m - number of local rows 3613 n - number of columns (same on all processors) 3614 rstart - first row in new global matrix generated 3615 */ 3616 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3617 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3618 if (call == MAT_INITIAL_MATRIX) { 3619 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3620 ii = aij->i; 3621 jj = aij->j; 3622 3623 /* 3624 Determine the number of non-zeros in the diagonal and off-diagonal 3625 portions of the matrix in order to do correct preallocation 3626 */ 3627 3628 /* first get start and end of "diagonal" columns */ 3629 if (csize == PETSC_DECIDE) { 3630 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3631 if (mglobal == n) { /* square matrix */ 3632 nlocal = m; 3633 } else { 3634 nlocal = n/size + ((n % size) > rank); 3635 } 3636 } else { 3637 nlocal = csize; 3638 } 3639 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3640 rstart = rend - nlocal; 3641 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3642 3643 /* next, compute all the lengths */ 3644 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3645 olens = dlens + m; 3646 for (i=0; i<m; i++) { 3647 jend = ii[i+1] - ii[i]; 3648 olen = 0; 3649 dlen = 0; 3650 for (j=0; j<jend; j++) { 3651 if (*jj < rstart || *jj >= rend) olen++; 3652 else dlen++; 3653 jj++; 3654 } 3655 olens[i] = olen; 3656 dlens[i] = dlen; 3657 } 3658 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3659 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3660 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3661 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3662 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3663 ierr = PetscFree(dlens);CHKERRQ(ierr); 3664 } else { 3665 PetscInt ml,nl; 3666 3667 M = *newmat; 3668 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3669 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3670 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3671 /* 3672 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3673 rather than the slower MatSetValues(). 3674 */ 3675 M->was_assembled = PETSC_TRUE; 3676 M->assembled = PETSC_FALSE; 3677 } 3678 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3679 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3680 ii = aij->i; 3681 jj = aij->j; 3682 aa = aij->a; 3683 for (i=0; i<m; i++) { 3684 row = rstart + i; 3685 nz = ii[i+1] - ii[i]; 3686 cwork = jj; jj += nz; 3687 vwork = aa; aa += nz; 3688 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3689 } 3690 3691 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3693 *newmat = M; 3694 3695 /* save submatrix used in processor for next request */ 3696 if (call == MAT_INITIAL_MATRIX) { 3697 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3698 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3699 } 3700 PetscFunctionReturn(0); 3701 } 3702 3703 #undef __FUNCT__ 3704 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3705 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3706 { 3707 PetscInt m,cstart, cend,j,nnz,i,d; 3708 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3709 const PetscInt *JJ; 3710 PetscScalar *values; 3711 PetscErrorCode ierr; 3712 3713 PetscFunctionBegin; 3714 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3715 3716 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3717 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3718 m = B->rmap->n; 3719 cstart = B->cmap->rstart; 3720 cend = B->cmap->rend; 3721 rstart = B->rmap->rstart; 3722 3723 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3724 3725 #if defined(PETSC_USE_DEBUGGING) 3726 for (i=0; i<m; i++) { 3727 nnz = Ii[i+1]- Ii[i]; 3728 JJ = J + Ii[i]; 3729 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3730 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3731 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3732 } 3733 #endif 3734 3735 for (i=0; i<m; i++) { 3736 nnz = Ii[i+1]- Ii[i]; 3737 JJ = J + Ii[i]; 3738 nnz_max = PetscMax(nnz_max,nnz); 3739 d = 0; 3740 for (j=0; j<nnz; j++) { 3741 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3742 } 3743 d_nnz[i] = d; 3744 o_nnz[i] = nnz - d; 3745 } 3746 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3747 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3748 3749 if (v) values = (PetscScalar*)v; 3750 else { 3751 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3752 } 3753 3754 for (i=0; i<m; i++) { 3755 ii = i + rstart; 3756 nnz = Ii[i+1]- Ii[i]; 3757 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3758 } 3759 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3760 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 3762 if (!v) { 3763 ierr = PetscFree(values);CHKERRQ(ierr); 3764 } 3765 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3766 PetscFunctionReturn(0); 3767 } 3768 3769 #undef __FUNCT__ 3770 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3771 /*@ 3772 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3773 (the default parallel PETSc format). 3774 3775 Collective on MPI_Comm 3776 3777 Input Parameters: 3778 + B - the matrix 3779 . i - the indices into j for the start of each local row (starts with zero) 3780 . j - the column indices for each local row (starts with zero) 3781 - v - optional values in the matrix 3782 3783 Level: developer 3784 3785 Notes: 3786 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3787 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3788 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3789 3790 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3791 3792 The format which is used for the sparse matrix input, is equivalent to a 3793 row-major ordering.. i.e for the following matrix, the input data expected is 3794 as shown: 3795 3796 1 0 0 3797 2 0 3 P0 3798 ------- 3799 4 5 6 P1 3800 3801 Process0 [P0]: rows_owned=[0,1] 3802 i = {0,1,3} [size = nrow+1 = 2+1] 3803 j = {0,0,2} [size = nz = 6] 3804 v = {1,2,3} [size = nz = 6] 3805 3806 Process1 [P1]: rows_owned=[2] 3807 i = {0,3} [size = nrow+1 = 1+1] 3808 j = {0,1,2} [size = nz = 6] 3809 v = {4,5,6} [size = nz = 6] 3810 3811 .keywords: matrix, aij, compressed row, sparse, parallel 3812 3813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3814 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3815 @*/ 3816 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3817 { 3818 PetscErrorCode ierr; 3819 3820 PetscFunctionBegin; 3821 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3822 PetscFunctionReturn(0); 3823 } 3824 3825 #undef __FUNCT__ 3826 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3827 /*@C 3828 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3829 (the default parallel PETSc format). For good matrix assembly performance 3830 the user should preallocate the matrix storage by setting the parameters 3831 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3832 performance can be increased by more than a factor of 50. 3833 3834 Collective on MPI_Comm 3835 3836 Input Parameters: 3837 + A - the matrix 3838 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3839 (same value is used for all local rows) 3840 . d_nnz - array containing the number of nonzeros in the various rows of the 3841 DIAGONAL portion of the local submatrix (possibly different for each row) 3842 or NULL, if d_nz is used to specify the nonzero structure. 3843 The size of this array is equal to the number of local rows, i.e 'm'. 3844 For matrices that will be factored, you must leave room for (and set) 3845 the diagonal entry even if it is zero. 3846 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3847 submatrix (same value is used for all local rows). 3848 - o_nnz - array containing the number of nonzeros in the various rows of the 3849 OFF-DIAGONAL portion of the local submatrix (possibly different for 3850 each row) or NULL, if o_nz is used to specify the nonzero 3851 structure. The size of this array is equal to the number 3852 of local rows, i.e 'm'. 3853 3854 If the *_nnz parameter is given then the *_nz parameter is ignored 3855 3856 The AIJ format (also called the Yale sparse matrix format or 3857 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3858 storage. The stored row and column indices begin with zero. 3859 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3860 3861 The parallel matrix is partitioned such that the first m0 rows belong to 3862 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3863 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3864 3865 The DIAGONAL portion of the local submatrix of a processor can be defined 3866 as the submatrix which is obtained by extraction the part corresponding to 3867 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3868 first row that belongs to the processor, r2 is the last row belonging to 3869 the this processor, and c1-c2 is range of indices of the local part of a 3870 vector suitable for applying the matrix to. This is an mxn matrix. In the 3871 common case of a square matrix, the row and column ranges are the same and 3872 the DIAGONAL part is also square. The remaining portion of the local 3873 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3874 3875 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3876 3877 You can call MatGetInfo() to get information on how effective the preallocation was; 3878 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3879 You can also run with the option -info and look for messages with the string 3880 malloc in them to see if additional memory allocation was needed. 3881 3882 Example usage: 3883 3884 Consider the following 8x8 matrix with 34 non-zero values, that is 3885 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3886 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3887 as follows: 3888 3889 .vb 3890 1 2 0 | 0 3 0 | 0 4 3891 Proc0 0 5 6 | 7 0 0 | 8 0 3892 9 0 10 | 11 0 0 | 12 0 3893 ------------------------------------- 3894 13 0 14 | 15 16 17 | 0 0 3895 Proc1 0 18 0 | 19 20 21 | 0 0 3896 0 0 0 | 22 23 0 | 24 0 3897 ------------------------------------- 3898 Proc2 25 26 27 | 0 0 28 | 29 0 3899 30 0 0 | 31 32 33 | 0 34 3900 .ve 3901 3902 This can be represented as a collection of submatrices as: 3903 3904 .vb 3905 A B C 3906 D E F 3907 G H I 3908 .ve 3909 3910 Where the submatrices A,B,C are owned by proc0, D,E,F are 3911 owned by proc1, G,H,I are owned by proc2. 3912 3913 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3914 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3915 The 'M','N' parameters are 8,8, and have the same values on all procs. 3916 3917 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3918 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3919 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3920 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3921 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3922 matrix, ans [DF] as another SeqAIJ matrix. 3923 3924 When d_nz, o_nz parameters are specified, d_nz storage elements are 3925 allocated for every row of the local diagonal submatrix, and o_nz 3926 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3927 One way to choose d_nz and o_nz is to use the max nonzerors per local 3928 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3929 In this case, the values of d_nz,o_nz are: 3930 .vb 3931 proc0 : dnz = 2, o_nz = 2 3932 proc1 : dnz = 3, o_nz = 2 3933 proc2 : dnz = 1, o_nz = 4 3934 .ve 3935 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3936 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3937 for proc3. i.e we are using 12+15+10=37 storage locations to store 3938 34 values. 3939 3940 When d_nnz, o_nnz parameters are specified, the storage is specified 3941 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3942 In the above case the values for d_nnz,o_nnz are: 3943 .vb 3944 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3945 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3946 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3947 .ve 3948 Here the space allocated is sum of all the above values i.e 34, and 3949 hence pre-allocation is perfect. 3950 3951 Level: intermediate 3952 3953 .keywords: matrix, aij, compressed row, sparse, parallel 3954 3955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3956 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3957 @*/ 3958 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3959 { 3960 PetscErrorCode ierr; 3961 3962 PetscFunctionBegin; 3963 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3964 PetscValidType(B,1); 3965 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3966 PetscFunctionReturn(0); 3967 } 3968 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3971 /*@ 3972 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3973 CSR format the local rows. 3974 3975 Collective on MPI_Comm 3976 3977 Input Parameters: 3978 + comm - MPI communicator 3979 . m - number of local rows (Cannot be PETSC_DECIDE) 3980 . n - This value should be the same as the local size used in creating the 3981 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3982 calculated if N is given) For square matrices n is almost always m. 3983 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3984 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3985 . i - row indices 3986 . j - column indices 3987 - a - matrix values 3988 3989 Output Parameter: 3990 . mat - the matrix 3991 3992 Level: intermediate 3993 3994 Notes: 3995 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3996 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3997 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3998 3999 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4000 4001 The format which is used for the sparse matrix input, is equivalent to a 4002 row-major ordering.. i.e for the following matrix, the input data expected is 4003 as shown: 4004 4005 1 0 0 4006 2 0 3 P0 4007 ------- 4008 4 5 6 P1 4009 4010 Process0 [P0]: rows_owned=[0,1] 4011 i = {0,1,3} [size = nrow+1 = 2+1] 4012 j = {0,0,2} [size = nz = 6] 4013 v = {1,2,3} [size = nz = 6] 4014 4015 Process1 [P1]: rows_owned=[2] 4016 i = {0,3} [size = nrow+1 = 1+1] 4017 j = {0,1,2} [size = nz = 6] 4018 v = {4,5,6} [size = nz = 6] 4019 4020 .keywords: matrix, aij, compressed row, sparse, parallel 4021 4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4023 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4024 @*/ 4025 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4026 { 4027 PetscErrorCode ierr; 4028 4029 PetscFunctionBegin; 4030 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4031 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4032 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4033 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4034 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4035 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4036 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4037 PetscFunctionReturn(0); 4038 } 4039 4040 #undef __FUNCT__ 4041 #define __FUNCT__ "MatCreateAIJ" 4042 /*@C 4043 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4044 (the default parallel PETSc format). For good matrix assembly performance 4045 the user should preallocate the matrix storage by setting the parameters 4046 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4047 performance can be increased by more than a factor of 50. 4048 4049 Collective on MPI_Comm 4050 4051 Input Parameters: 4052 + comm - MPI communicator 4053 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4054 This value should be the same as the local size used in creating the 4055 y vector for the matrix-vector product y = Ax. 4056 . n - This value should be the same as the local size used in creating the 4057 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4058 calculated if N is given) For square matrices n is almost always m. 4059 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4060 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4061 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4062 (same value is used for all local rows) 4063 . d_nnz - array containing the number of nonzeros in the various rows of the 4064 DIAGONAL portion of the local submatrix (possibly different for each row) 4065 or NULL, if d_nz is used to specify the nonzero structure. 4066 The size of this array is equal to the number of local rows, i.e 'm'. 4067 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4068 submatrix (same value is used for all local rows). 4069 - o_nnz - array containing the number of nonzeros in the various rows of the 4070 OFF-DIAGONAL portion of the local submatrix (possibly different for 4071 each row) or NULL, if o_nz is used to specify the nonzero 4072 structure. The size of this array is equal to the number 4073 of local rows, i.e 'm'. 4074 4075 Output Parameter: 4076 . A - the matrix 4077 4078 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4079 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4080 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4081 4082 Notes: 4083 If the *_nnz parameter is given then the *_nz parameter is ignored 4084 4085 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4086 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4087 storage requirements for this matrix. 4088 4089 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4090 processor than it must be used on all processors that share the object for 4091 that argument. 4092 4093 The user MUST specify either the local or global matrix dimensions 4094 (possibly both). 4095 4096 The parallel matrix is partitioned across processors such that the 4097 first m0 rows belong to process 0, the next m1 rows belong to 4098 process 1, the next m2 rows belong to process 2 etc.. where 4099 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4100 values corresponding to [m x N] submatrix. 4101 4102 The columns are logically partitioned with the n0 columns belonging 4103 to 0th partition, the next n1 columns belonging to the next 4104 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4105 4106 The DIAGONAL portion of the local submatrix on any given processor 4107 is the submatrix corresponding to the rows and columns m,n 4108 corresponding to the given processor. i.e diagonal matrix on 4109 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4110 etc. The remaining portion of the local submatrix [m x (N-n)] 4111 constitute the OFF-DIAGONAL portion. The example below better 4112 illustrates this concept. 4113 4114 For a square global matrix we define each processor's diagonal portion 4115 to be its local rows and the corresponding columns (a square submatrix); 4116 each processor's off-diagonal portion encompasses the remainder of the 4117 local matrix (a rectangular submatrix). 4118 4119 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4120 4121 When calling this routine with a single process communicator, a matrix of 4122 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4123 type of communicator, use the construction mechanism: 4124 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4125 4126 By default, this format uses inodes (identical nodes) when possible. 4127 We search for consecutive rows with the same nonzero structure, thereby 4128 reusing matrix information to achieve increased efficiency. 4129 4130 Options Database Keys: 4131 + -mat_no_inode - Do not use inodes 4132 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4133 - -mat_aij_oneindex - Internally use indexing starting at 1 4134 rather than 0. Note that when calling MatSetValues(), 4135 the user still MUST index entries starting at 0! 4136 4137 4138 Example usage: 4139 4140 Consider the following 8x8 matrix with 34 non-zero values, that is 4141 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4142 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4143 as follows: 4144 4145 .vb 4146 1 2 0 | 0 3 0 | 0 4 4147 Proc0 0 5 6 | 7 0 0 | 8 0 4148 9 0 10 | 11 0 0 | 12 0 4149 ------------------------------------- 4150 13 0 14 | 15 16 17 | 0 0 4151 Proc1 0 18 0 | 19 20 21 | 0 0 4152 0 0 0 | 22 23 0 | 24 0 4153 ------------------------------------- 4154 Proc2 25 26 27 | 0 0 28 | 29 0 4155 30 0 0 | 31 32 33 | 0 34 4156 .ve 4157 4158 This can be represented as a collection of submatrices as: 4159 4160 .vb 4161 A B C 4162 D E F 4163 G H I 4164 .ve 4165 4166 Where the submatrices A,B,C are owned by proc0, D,E,F are 4167 owned by proc1, G,H,I are owned by proc2. 4168 4169 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4170 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4171 The 'M','N' parameters are 8,8, and have the same values on all procs. 4172 4173 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4174 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4175 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4176 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4177 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4178 matrix, ans [DF] as another SeqAIJ matrix. 4179 4180 When d_nz, o_nz parameters are specified, d_nz storage elements are 4181 allocated for every row of the local diagonal submatrix, and o_nz 4182 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4183 One way to choose d_nz and o_nz is to use the max nonzerors per local 4184 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4185 In this case, the values of d_nz,o_nz are: 4186 .vb 4187 proc0 : dnz = 2, o_nz = 2 4188 proc1 : dnz = 3, o_nz = 2 4189 proc2 : dnz = 1, o_nz = 4 4190 .ve 4191 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4192 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4193 for proc3. i.e we are using 12+15+10=37 storage locations to store 4194 34 values. 4195 4196 When d_nnz, o_nnz parameters are specified, the storage is specified 4197 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4198 In the above case the values for d_nnz,o_nnz are: 4199 .vb 4200 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4201 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4202 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4203 .ve 4204 Here the space allocated is sum of all the above values i.e 34, and 4205 hence pre-allocation is perfect. 4206 4207 Level: intermediate 4208 4209 .keywords: matrix, aij, compressed row, sparse, parallel 4210 4211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4212 MPIAIJ, MatCreateMPIAIJWithArrays() 4213 @*/ 4214 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4215 { 4216 PetscErrorCode ierr; 4217 PetscMPIInt size; 4218 4219 PetscFunctionBegin; 4220 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4221 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4222 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4223 if (size > 1) { 4224 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4225 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4226 } else { 4227 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4228 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4229 } 4230 PetscFunctionReturn(0); 4231 } 4232 4233 #undef __FUNCT__ 4234 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4235 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4236 { 4237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4238 4239 PetscFunctionBegin; 4240 *Ad = a->A; 4241 *Ao = a->B; 4242 *colmap = a->garray; 4243 PetscFunctionReturn(0); 4244 } 4245 4246 #undef __FUNCT__ 4247 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4248 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4249 { 4250 PetscErrorCode ierr; 4251 PetscInt i; 4252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4253 4254 PetscFunctionBegin; 4255 if (coloring->ctype == IS_COLORING_GLOBAL) { 4256 ISColoringValue *allcolors,*colors; 4257 ISColoring ocoloring; 4258 4259 /* set coloring for diagonal portion */ 4260 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4261 4262 /* set coloring for off-diagonal portion */ 4263 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4264 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4265 for (i=0; i<a->B->cmap->n; i++) { 4266 colors[i] = allcolors[a->garray[i]]; 4267 } 4268 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4269 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4270 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4271 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4272 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4273 ISColoringValue *colors; 4274 PetscInt *larray; 4275 ISColoring ocoloring; 4276 4277 /* set coloring for diagonal portion */ 4278 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4279 for (i=0; i<a->A->cmap->n; i++) { 4280 larray[i] = i + A->cmap->rstart; 4281 } 4282 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4283 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4284 for (i=0; i<a->A->cmap->n; i++) { 4285 colors[i] = coloring->colors[larray[i]]; 4286 } 4287 ierr = PetscFree(larray);CHKERRQ(ierr); 4288 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4289 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4290 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4291 4292 /* set coloring for off-diagonal portion */ 4293 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4294 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4295 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4296 for (i=0; i<a->B->cmap->n; i++) { 4297 colors[i] = coloring->colors[larray[i]]; 4298 } 4299 ierr = PetscFree(larray);CHKERRQ(ierr); 4300 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4301 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4302 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4303 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4304 PetscFunctionReturn(0); 4305 } 4306 4307 #undef __FUNCT__ 4308 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4309 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4310 { 4311 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4312 PetscErrorCode ierr; 4313 4314 PetscFunctionBegin; 4315 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4316 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4317 PetscFunctionReturn(0); 4318 } 4319 4320 #undef __FUNCT__ 4321 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4322 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4323 { 4324 PetscErrorCode ierr; 4325 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4326 PetscInt *indx; 4327 4328 PetscFunctionBegin; 4329 /* This routine will ONLY return MPIAIJ type matrix */ 4330 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4331 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4332 if (n == PETSC_DECIDE) { 4333 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4334 } 4335 /* Check sum(n) = N */ 4336 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4337 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4338 4339 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4340 rstart -= m; 4341 4342 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4343 for (i=0; i<m; i++) { 4344 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4345 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4346 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4347 } 4348 4349 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4350 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4351 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4352 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4353 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4354 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4355 PetscFunctionReturn(0); 4356 } 4357 4358 #undef __FUNCT__ 4359 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4360 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4361 { 4362 PetscErrorCode ierr; 4363 PetscInt m,N,i,rstart,nnz,Ii; 4364 PetscInt *indx; 4365 PetscScalar *values; 4366 4367 PetscFunctionBegin; 4368 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4369 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4370 for (i=0; i<m; i++) { 4371 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4372 Ii = i + rstart; 4373 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4374 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4375 } 4376 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4377 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4378 PetscFunctionReturn(0); 4379 } 4380 4381 #undef __FUNCT__ 4382 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4383 /*@ 4384 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4385 matrices from each processor 4386 4387 Collective on MPI_Comm 4388 4389 Input Parameters: 4390 + comm - the communicators the parallel matrix will live on 4391 . inmat - the input sequential matrices 4392 . n - number of local columns (or PETSC_DECIDE) 4393 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4394 4395 Output Parameter: 4396 . outmat - the parallel matrix generated 4397 4398 Level: advanced 4399 4400 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4401 4402 @*/ 4403 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4404 { 4405 PetscErrorCode ierr; 4406 PetscMPIInt size; 4407 4408 PetscFunctionBegin; 4409 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4410 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4411 if (size == 1) { 4412 if (scall == MAT_INITIAL_MATRIX) { 4413 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4414 } else { 4415 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4416 } 4417 } else { 4418 if (scall == MAT_INITIAL_MATRIX) { 4419 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4420 } 4421 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4422 } 4423 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4424 PetscFunctionReturn(0); 4425 } 4426 4427 #undef __FUNCT__ 4428 #define __FUNCT__ "MatFileSplit" 4429 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4430 { 4431 PetscErrorCode ierr; 4432 PetscMPIInt rank; 4433 PetscInt m,N,i,rstart,nnz; 4434 size_t len; 4435 const PetscInt *indx; 4436 PetscViewer out; 4437 char *name; 4438 Mat B; 4439 const PetscScalar *values; 4440 4441 PetscFunctionBegin; 4442 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4443 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4444 /* Should this be the type of the diagonal block of A? */ 4445 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4446 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4447 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4448 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4449 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4450 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4451 for (i=0; i<m; i++) { 4452 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4453 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4454 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4455 } 4456 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4457 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4458 4459 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4460 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4461 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4462 sprintf(name,"%s.%d",outfile,rank); 4463 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4464 ierr = PetscFree(name);CHKERRQ(ierr); 4465 ierr = MatView(B,out);CHKERRQ(ierr); 4466 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4467 ierr = MatDestroy(&B);CHKERRQ(ierr); 4468 PetscFunctionReturn(0); 4469 } 4470 4471 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4472 #undef __FUNCT__ 4473 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4474 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4475 { 4476 PetscErrorCode ierr; 4477 Mat_Merge_SeqsToMPI *merge; 4478 PetscContainer container; 4479 4480 PetscFunctionBegin; 4481 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4482 if (container) { 4483 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4496 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4497 ierr = PetscFree(merge);CHKERRQ(ierr); 4498 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4499 } 4500 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4501 PetscFunctionReturn(0); 4502 } 4503 4504 #include <../src/mat/utils/freespace.h> 4505 #include <petscbt.h> 4506 4507 #undef __FUNCT__ 4508 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4509 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4510 { 4511 PetscErrorCode ierr; 4512 MPI_Comm comm; 4513 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4514 PetscMPIInt size,rank,taga,*len_s; 4515 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4516 PetscInt proc,m; 4517 PetscInt **buf_ri,**buf_rj; 4518 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4519 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4520 MPI_Request *s_waits,*r_waits; 4521 MPI_Status *status; 4522 MatScalar *aa=a->a; 4523 MatScalar **abuf_r,*ba_i; 4524 Mat_Merge_SeqsToMPI *merge; 4525 PetscContainer container; 4526 4527 PetscFunctionBegin; 4528 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4529 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4530 4531 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4532 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4533 4534 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4535 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4536 4537 bi = merge->bi; 4538 bj = merge->bj; 4539 buf_ri = merge->buf_ri; 4540 buf_rj = merge->buf_rj; 4541 4542 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4543 owners = merge->rowmap->range; 4544 len_s = merge->len_s; 4545 4546 /* send and recv matrix values */ 4547 /*-----------------------------*/ 4548 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4549 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4550 4551 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4552 for (proc=0,k=0; proc<size; proc++) { 4553 if (!len_s[proc]) continue; 4554 i = owners[proc]; 4555 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4556 k++; 4557 } 4558 4559 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4560 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4561 ierr = PetscFree(status);CHKERRQ(ierr); 4562 4563 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4564 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4565 4566 /* insert mat values of mpimat */ 4567 /*----------------------------*/ 4568 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4569 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4570 4571 for (k=0; k<merge->nrecv; k++) { 4572 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4573 nrows = *(buf_ri_k[k]); 4574 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4575 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4576 } 4577 4578 /* set values of ba */ 4579 m = merge->rowmap->n; 4580 for (i=0; i<m; i++) { 4581 arow = owners[rank] + i; 4582 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4583 bnzi = bi[i+1] - bi[i]; 4584 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4585 4586 /* add local non-zero vals of this proc's seqmat into ba */ 4587 anzi = ai[arow+1] - ai[arow]; 4588 aj = a->j + ai[arow]; 4589 aa = a->a + ai[arow]; 4590 nextaj = 0; 4591 for (j=0; nextaj<anzi; j++) { 4592 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4593 ba_i[j] += aa[nextaj++]; 4594 } 4595 } 4596 4597 /* add received vals into ba */ 4598 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4599 /* i-th row */ 4600 if (i == *nextrow[k]) { 4601 anzi = *(nextai[k]+1) - *nextai[k]; 4602 aj = buf_rj[k] + *(nextai[k]); 4603 aa = abuf_r[k] + *(nextai[k]); 4604 nextaj = 0; 4605 for (j=0; nextaj<anzi; j++) { 4606 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4607 ba_i[j] += aa[nextaj++]; 4608 } 4609 } 4610 nextrow[k]++; nextai[k]++; 4611 } 4612 } 4613 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4614 } 4615 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4616 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4617 4618 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4619 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4620 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4621 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4622 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4623 PetscFunctionReturn(0); 4624 } 4625 4626 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4627 4628 #undef __FUNCT__ 4629 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4630 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4631 { 4632 PetscErrorCode ierr; 4633 Mat B_mpi; 4634 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4635 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4636 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4637 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4638 PetscInt len,proc,*dnz,*onz,bs,cbs; 4639 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4640 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4641 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4642 MPI_Status *status; 4643 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4644 PetscBT lnkbt; 4645 Mat_Merge_SeqsToMPI *merge; 4646 PetscContainer container; 4647 4648 PetscFunctionBegin; 4649 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4650 4651 /* make sure it is a PETSc comm */ 4652 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4654 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4655 4656 ierr = PetscNew(&merge);CHKERRQ(ierr); 4657 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4658 4659 /* determine row ownership */ 4660 /*---------------------------------------------------------*/ 4661 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4662 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4663 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4664 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4665 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4666 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4667 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4668 4669 m = merge->rowmap->n; 4670 owners = merge->rowmap->range; 4671 4672 /* determine the number of messages to send, their lengths */ 4673 /*---------------------------------------------------------*/ 4674 len_s = merge->len_s; 4675 4676 len = 0; /* length of buf_si[] */ 4677 merge->nsend = 0; 4678 for (proc=0; proc<size; proc++) { 4679 len_si[proc] = 0; 4680 if (proc == rank) { 4681 len_s[proc] = 0; 4682 } else { 4683 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4684 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4685 } 4686 if (len_s[proc]) { 4687 merge->nsend++; 4688 nrows = 0; 4689 for (i=owners[proc]; i<owners[proc+1]; i++) { 4690 if (ai[i+1] > ai[i]) nrows++; 4691 } 4692 len_si[proc] = 2*(nrows+1); 4693 len += len_si[proc]; 4694 } 4695 } 4696 4697 /* determine the number and length of messages to receive for ij-structure */ 4698 /*-------------------------------------------------------------------------*/ 4699 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4700 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4701 4702 /* post the Irecv of j-structure */ 4703 /*-------------------------------*/ 4704 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4705 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4706 4707 /* post the Isend of j-structure */ 4708 /*--------------------------------*/ 4709 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4710 4711 for (proc=0, k=0; proc<size; proc++) { 4712 if (!len_s[proc]) continue; 4713 i = owners[proc]; 4714 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4715 k++; 4716 } 4717 4718 /* receives and sends of j-structure are complete */ 4719 /*------------------------------------------------*/ 4720 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4721 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4722 4723 /* send and recv i-structure */ 4724 /*---------------------------*/ 4725 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4726 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4727 4728 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4729 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4730 for (proc=0,k=0; proc<size; proc++) { 4731 if (!len_s[proc]) continue; 4732 /* form outgoing message for i-structure: 4733 buf_si[0]: nrows to be sent 4734 [1:nrows]: row index (global) 4735 [nrows+1:2*nrows+1]: i-structure index 4736 */ 4737 /*-------------------------------------------*/ 4738 nrows = len_si[proc]/2 - 1; 4739 buf_si_i = buf_si + nrows+1; 4740 buf_si[0] = nrows; 4741 buf_si_i[0] = 0; 4742 nrows = 0; 4743 for (i=owners[proc]; i<owners[proc+1]; i++) { 4744 anzi = ai[i+1] - ai[i]; 4745 if (anzi) { 4746 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4747 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4748 nrows++; 4749 } 4750 } 4751 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4752 k++; 4753 buf_si += len_si[proc]; 4754 } 4755 4756 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4757 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4758 4759 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4760 for (i=0; i<merge->nrecv; i++) { 4761 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4762 } 4763 4764 ierr = PetscFree(len_si);CHKERRQ(ierr); 4765 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4766 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4767 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4768 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4769 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4770 ierr = PetscFree(status);CHKERRQ(ierr); 4771 4772 /* compute a local seq matrix in each processor */ 4773 /*----------------------------------------------*/ 4774 /* allocate bi array and free space for accumulating nonzero column info */ 4775 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4776 bi[0] = 0; 4777 4778 /* create and initialize a linked list */ 4779 nlnk = N+1; 4780 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4781 4782 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4783 len = ai[owners[rank+1]] - ai[owners[rank]]; 4784 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4785 4786 current_space = free_space; 4787 4788 /* determine symbolic info for each local row */ 4789 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4790 4791 for (k=0; k<merge->nrecv; k++) { 4792 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4793 nrows = *buf_ri_k[k]; 4794 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4795 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4796 } 4797 4798 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4799 len = 0; 4800 for (i=0; i<m; i++) { 4801 bnzi = 0; 4802 /* add local non-zero cols of this proc's seqmat into lnk */ 4803 arow = owners[rank] + i; 4804 anzi = ai[arow+1] - ai[arow]; 4805 aj = a->j + ai[arow]; 4806 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4807 bnzi += nlnk; 4808 /* add received col data into lnk */ 4809 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4810 if (i == *nextrow[k]) { /* i-th row */ 4811 anzi = *(nextai[k]+1) - *nextai[k]; 4812 aj = buf_rj[k] + *nextai[k]; 4813 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4814 bnzi += nlnk; 4815 nextrow[k]++; nextai[k]++; 4816 } 4817 } 4818 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4819 4820 /* if free space is not available, make more free space */ 4821 if (current_space->local_remaining<bnzi) { 4822 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4823 nspacedouble++; 4824 } 4825 /* copy data into free space, then initialize lnk */ 4826 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4827 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4828 4829 current_space->array += bnzi; 4830 current_space->local_used += bnzi; 4831 current_space->local_remaining -= bnzi; 4832 4833 bi[i+1] = bi[i] + bnzi; 4834 } 4835 4836 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4837 4838 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4839 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4840 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4841 4842 /* create symbolic parallel matrix B_mpi */ 4843 /*---------------------------------------*/ 4844 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4845 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4846 if (n==PETSC_DECIDE) { 4847 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4848 } else { 4849 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4850 } 4851 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4852 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4853 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4854 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4855 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4856 4857 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4858 B_mpi->assembled = PETSC_FALSE; 4859 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4860 merge->bi = bi; 4861 merge->bj = bj; 4862 merge->buf_ri = buf_ri; 4863 merge->buf_rj = buf_rj; 4864 merge->coi = NULL; 4865 merge->coj = NULL; 4866 merge->owners_co = NULL; 4867 4868 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4869 4870 /* attach the supporting struct to B_mpi for reuse */ 4871 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4872 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4873 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4874 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4875 *mpimat = B_mpi; 4876 4877 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4878 PetscFunctionReturn(0); 4879 } 4880 4881 #undef __FUNCT__ 4882 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4883 /*@C 4884 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4885 matrices from each processor 4886 4887 Collective on MPI_Comm 4888 4889 Input Parameters: 4890 + comm - the communicators the parallel matrix will live on 4891 . seqmat - the input sequential matrices 4892 . m - number of local rows (or PETSC_DECIDE) 4893 . n - number of local columns (or PETSC_DECIDE) 4894 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4895 4896 Output Parameter: 4897 . mpimat - the parallel matrix generated 4898 4899 Level: advanced 4900 4901 Notes: 4902 The dimensions of the sequential matrix in each processor MUST be the same. 4903 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4904 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4905 @*/ 4906 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4907 { 4908 PetscErrorCode ierr; 4909 PetscMPIInt size; 4910 4911 PetscFunctionBegin; 4912 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4913 if (size == 1) { 4914 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4915 if (scall == MAT_INITIAL_MATRIX) { 4916 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4917 } else { 4918 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4919 } 4920 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4921 PetscFunctionReturn(0); 4922 } 4923 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4924 if (scall == MAT_INITIAL_MATRIX) { 4925 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4926 } 4927 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4928 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4929 PetscFunctionReturn(0); 4930 } 4931 4932 #undef __FUNCT__ 4933 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4934 /*@ 4935 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4936 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4937 with MatGetSize() 4938 4939 Not Collective 4940 4941 Input Parameters: 4942 + A - the matrix 4943 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4944 4945 Output Parameter: 4946 . A_loc - the local sequential matrix generated 4947 4948 Level: developer 4949 4950 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4951 4952 @*/ 4953 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4954 { 4955 PetscErrorCode ierr; 4956 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4957 Mat_SeqAIJ *mat,*a,*b; 4958 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4959 MatScalar *aa,*ba,*cam; 4960 PetscScalar *ca; 4961 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4962 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4963 PetscBool match; 4964 4965 PetscFunctionBegin; 4966 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4967 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4968 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4969 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4970 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4971 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4972 aa = a->a; ba = b->a; 4973 if (scall == MAT_INITIAL_MATRIX) { 4974 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4975 ci[0] = 0; 4976 for (i=0; i<am; i++) { 4977 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4978 } 4979 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4980 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4981 k = 0; 4982 for (i=0; i<am; i++) { 4983 ncols_o = bi[i+1] - bi[i]; 4984 ncols_d = ai[i+1] - ai[i]; 4985 /* off-diagonal portion of A */ 4986 for (jo=0; jo<ncols_o; jo++) { 4987 col = cmap[*bj]; 4988 if (col >= cstart) break; 4989 cj[k] = col; bj++; 4990 ca[k++] = *ba++; 4991 } 4992 /* diagonal portion of A */ 4993 for (j=0; j<ncols_d; j++) { 4994 cj[k] = cstart + *aj++; 4995 ca[k++] = *aa++; 4996 } 4997 /* off-diagonal portion of A */ 4998 for (j=jo; j<ncols_o; j++) { 4999 cj[k] = cmap[*bj++]; 5000 ca[k++] = *ba++; 5001 } 5002 } 5003 /* put together the new matrix */ 5004 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5005 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5006 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5007 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5008 mat->free_a = PETSC_TRUE; 5009 mat->free_ij = PETSC_TRUE; 5010 mat->nonew = 0; 5011 } else if (scall == MAT_REUSE_MATRIX) { 5012 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5013 ci = mat->i; cj = mat->j; cam = mat->a; 5014 for (i=0; i<am; i++) { 5015 /* off-diagonal portion of A */ 5016 ncols_o = bi[i+1] - bi[i]; 5017 for (jo=0; jo<ncols_o; jo++) { 5018 col = cmap[*bj]; 5019 if (col >= cstart) break; 5020 *cam++ = *ba++; bj++; 5021 } 5022 /* diagonal portion of A */ 5023 ncols_d = ai[i+1] - ai[i]; 5024 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5025 /* off-diagonal portion of A */ 5026 for (j=jo; j<ncols_o; j++) { 5027 *cam++ = *ba++; bj++; 5028 } 5029 } 5030 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5031 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5032 PetscFunctionReturn(0); 5033 } 5034 5035 #undef __FUNCT__ 5036 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5037 /*@C 5038 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5039 5040 Not Collective 5041 5042 Input Parameters: 5043 + A - the matrix 5044 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5045 - row, col - index sets of rows and columns to extract (or NULL) 5046 5047 Output Parameter: 5048 . A_loc - the local sequential matrix generated 5049 5050 Level: developer 5051 5052 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5053 5054 @*/ 5055 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5056 { 5057 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5058 PetscErrorCode ierr; 5059 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5060 IS isrowa,iscola; 5061 Mat *aloc; 5062 PetscBool match; 5063 5064 PetscFunctionBegin; 5065 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5066 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5067 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5068 if (!row) { 5069 start = A->rmap->rstart; end = A->rmap->rend; 5070 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5071 } else { 5072 isrowa = *row; 5073 } 5074 if (!col) { 5075 start = A->cmap->rstart; 5076 cmap = a->garray; 5077 nzA = a->A->cmap->n; 5078 nzB = a->B->cmap->n; 5079 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5080 ncols = 0; 5081 for (i=0; i<nzB; i++) { 5082 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5083 else break; 5084 } 5085 imark = i; 5086 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5087 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5088 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5089 } else { 5090 iscola = *col; 5091 } 5092 if (scall != MAT_INITIAL_MATRIX) { 5093 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5094 aloc[0] = *A_loc; 5095 } 5096 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5097 *A_loc = aloc[0]; 5098 ierr = PetscFree(aloc);CHKERRQ(ierr); 5099 if (!row) { 5100 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5101 } 5102 if (!col) { 5103 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5104 } 5105 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5106 PetscFunctionReturn(0); 5107 } 5108 5109 #undef __FUNCT__ 5110 #define __FUNCT__ "MatGetBrowsOfAcols" 5111 /*@C 5112 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5113 5114 Collective on Mat 5115 5116 Input Parameters: 5117 + A,B - the matrices in mpiaij format 5118 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5119 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5120 5121 Output Parameter: 5122 + rowb, colb - index sets of rows and columns of B to extract 5123 - B_seq - the sequential matrix generated 5124 5125 Level: developer 5126 5127 @*/ 5128 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5129 { 5130 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5131 PetscErrorCode ierr; 5132 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5133 IS isrowb,iscolb; 5134 Mat *bseq=NULL; 5135 5136 PetscFunctionBegin; 5137 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5138 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5139 } 5140 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5141 5142 if (scall == MAT_INITIAL_MATRIX) { 5143 start = A->cmap->rstart; 5144 cmap = a->garray; 5145 nzA = a->A->cmap->n; 5146 nzB = a->B->cmap->n; 5147 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5148 ncols = 0; 5149 for (i=0; i<nzB; i++) { /* row < local row index */ 5150 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5151 else break; 5152 } 5153 imark = i; 5154 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5155 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5156 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5157 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5158 } else { 5159 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5160 isrowb = *rowb; iscolb = *colb; 5161 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5162 bseq[0] = *B_seq; 5163 } 5164 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5165 *B_seq = bseq[0]; 5166 ierr = PetscFree(bseq);CHKERRQ(ierr); 5167 if (!rowb) { 5168 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5169 } else { 5170 *rowb = isrowb; 5171 } 5172 if (!colb) { 5173 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5174 } else { 5175 *colb = iscolb; 5176 } 5177 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5178 PetscFunctionReturn(0); 5179 } 5180 5181 #undef __FUNCT__ 5182 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5183 /* 5184 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5185 of the OFF-DIAGONAL portion of local A 5186 5187 Collective on Mat 5188 5189 Input Parameters: 5190 + A,B - the matrices in mpiaij format 5191 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5192 5193 Output Parameter: 5194 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5195 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5196 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5197 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5198 5199 Level: developer 5200 5201 */ 5202 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5203 { 5204 VecScatter_MPI_General *gen_to,*gen_from; 5205 PetscErrorCode ierr; 5206 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5207 Mat_SeqAIJ *b_oth; 5208 VecScatter ctx =a->Mvctx; 5209 MPI_Comm comm; 5210 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5211 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5212 PetscScalar *rvalues,*svalues; 5213 MatScalar *b_otha,*bufa,*bufA; 5214 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5215 MPI_Request *rwaits = NULL,*swaits = NULL; 5216 MPI_Status *sstatus,rstatus; 5217 PetscMPIInt jj; 5218 PetscInt *cols,sbs,rbs; 5219 PetscScalar *vals; 5220 5221 PetscFunctionBegin; 5222 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5223 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5224 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5225 } 5226 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5227 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5228 5229 gen_to = (VecScatter_MPI_General*)ctx->todata; 5230 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5231 rvalues = gen_from->values; /* holds the length of receiving row */ 5232 svalues = gen_to->values; /* holds the length of sending row */ 5233 nrecvs = gen_from->n; 5234 nsends = gen_to->n; 5235 5236 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5237 srow = gen_to->indices; /* local row index to be sent */ 5238 sstarts = gen_to->starts; 5239 sprocs = gen_to->procs; 5240 sstatus = gen_to->sstatus; 5241 sbs = gen_to->bs; 5242 rstarts = gen_from->starts; 5243 rprocs = gen_from->procs; 5244 rbs = gen_from->bs; 5245 5246 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5247 if (scall == MAT_INITIAL_MATRIX) { 5248 /* i-array */ 5249 /*---------*/ 5250 /* post receives */ 5251 for (i=0; i<nrecvs; i++) { 5252 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5253 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5254 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5255 } 5256 5257 /* pack the outgoing message */ 5258 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5259 5260 sstartsj[0] = 0; 5261 rstartsj[0] = 0; 5262 len = 0; /* total length of j or a array to be sent */ 5263 k = 0; 5264 for (i=0; i<nsends; i++) { 5265 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5266 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5267 for (j=0; j<nrows; j++) { 5268 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5269 for (l=0; l<sbs; l++) { 5270 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5271 5272 rowlen[j*sbs+l] = ncols; 5273 5274 len += ncols; 5275 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5276 } 5277 k++; 5278 } 5279 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5280 5281 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5282 } 5283 /* recvs and sends of i-array are completed */ 5284 i = nrecvs; 5285 while (i--) { 5286 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5287 } 5288 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5289 5290 /* allocate buffers for sending j and a arrays */ 5291 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5292 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5293 5294 /* create i-array of B_oth */ 5295 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5296 5297 b_othi[0] = 0; 5298 len = 0; /* total length of j or a array to be received */ 5299 k = 0; 5300 for (i=0; i<nrecvs; i++) { 5301 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5302 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5303 for (j=0; j<nrows; j++) { 5304 b_othi[k+1] = b_othi[k] + rowlen[j]; 5305 len += rowlen[j]; k++; 5306 } 5307 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5308 } 5309 5310 /* allocate space for j and a arrrays of B_oth */ 5311 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5312 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5313 5314 /* j-array */ 5315 /*---------*/ 5316 /* post receives of j-array */ 5317 for (i=0; i<nrecvs; i++) { 5318 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5319 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5320 } 5321 5322 /* pack the outgoing message j-array */ 5323 k = 0; 5324 for (i=0; i<nsends; i++) { 5325 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5326 bufJ = bufj+sstartsj[i]; 5327 for (j=0; j<nrows; j++) { 5328 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5329 for (ll=0; ll<sbs; ll++) { 5330 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5331 for (l=0; l<ncols; l++) { 5332 *bufJ++ = cols[l]; 5333 } 5334 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5335 } 5336 } 5337 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5338 } 5339 5340 /* recvs and sends of j-array are completed */ 5341 i = nrecvs; 5342 while (i--) { 5343 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5344 } 5345 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5346 } else if (scall == MAT_REUSE_MATRIX) { 5347 sstartsj = *startsj_s; 5348 rstartsj = *startsj_r; 5349 bufa = *bufa_ptr; 5350 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5351 b_otha = b_oth->a; 5352 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5353 5354 /* a-array */ 5355 /*---------*/ 5356 /* post receives of a-array */ 5357 for (i=0; i<nrecvs; i++) { 5358 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5359 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5360 } 5361 5362 /* pack the outgoing message a-array */ 5363 k = 0; 5364 for (i=0; i<nsends; i++) { 5365 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5366 bufA = bufa+sstartsj[i]; 5367 for (j=0; j<nrows; j++) { 5368 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5369 for (ll=0; ll<sbs; ll++) { 5370 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5371 for (l=0; l<ncols; l++) { 5372 *bufA++ = vals[l]; 5373 } 5374 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5375 } 5376 } 5377 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5378 } 5379 /* recvs and sends of a-array are completed */ 5380 i = nrecvs; 5381 while (i--) { 5382 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5383 } 5384 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5385 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5386 5387 if (scall == MAT_INITIAL_MATRIX) { 5388 /* put together the new matrix */ 5389 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5390 5391 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5392 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5393 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5394 b_oth->free_a = PETSC_TRUE; 5395 b_oth->free_ij = PETSC_TRUE; 5396 b_oth->nonew = 0; 5397 5398 ierr = PetscFree(bufj);CHKERRQ(ierr); 5399 if (!startsj_s || !bufa_ptr) { 5400 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5401 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5402 } else { 5403 *startsj_s = sstartsj; 5404 *startsj_r = rstartsj; 5405 *bufa_ptr = bufa; 5406 } 5407 } 5408 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5409 PetscFunctionReturn(0); 5410 } 5411 5412 #undef __FUNCT__ 5413 #define __FUNCT__ "MatGetCommunicationStructs" 5414 /*@C 5415 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5416 5417 Not Collective 5418 5419 Input Parameters: 5420 . A - The matrix in mpiaij format 5421 5422 Output Parameter: 5423 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5424 . colmap - A map from global column index to local index into lvec 5425 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5426 5427 Level: developer 5428 5429 @*/ 5430 #if defined(PETSC_USE_CTABLE) 5431 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5432 #else 5433 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5434 #endif 5435 { 5436 Mat_MPIAIJ *a; 5437 5438 PetscFunctionBegin; 5439 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5440 PetscValidPointer(lvec, 2); 5441 PetscValidPointer(colmap, 3); 5442 PetscValidPointer(multScatter, 4); 5443 a = (Mat_MPIAIJ*) A->data; 5444 if (lvec) *lvec = a->lvec; 5445 if (colmap) *colmap = a->colmap; 5446 if (multScatter) *multScatter = a->Mvctx; 5447 PetscFunctionReturn(0); 5448 } 5449 5450 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5451 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5452 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5453 5454 #undef __FUNCT__ 5455 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5456 /* 5457 Computes (B'*A')' since computing B*A directly is untenable 5458 5459 n p p 5460 ( ) ( ) ( ) 5461 m ( A ) * n ( B ) = m ( C ) 5462 ( ) ( ) ( ) 5463 5464 */ 5465 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5466 { 5467 PetscErrorCode ierr; 5468 Mat At,Bt,Ct; 5469 5470 PetscFunctionBegin; 5471 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5472 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5473 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5474 ierr = MatDestroy(&At);CHKERRQ(ierr); 5475 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5476 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5477 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5478 PetscFunctionReturn(0); 5479 } 5480 5481 #undef __FUNCT__ 5482 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5483 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5484 { 5485 PetscErrorCode ierr; 5486 PetscInt m=A->rmap->n,n=B->cmap->n; 5487 Mat Cmat; 5488 5489 PetscFunctionBegin; 5490 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5491 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5492 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5493 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5494 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5495 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5496 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5497 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5498 5499 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5500 5501 *C = Cmat; 5502 PetscFunctionReturn(0); 5503 } 5504 5505 /* ----------------------------------------------------------------*/ 5506 #undef __FUNCT__ 5507 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5508 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5509 { 5510 PetscErrorCode ierr; 5511 5512 PetscFunctionBegin; 5513 if (scall == MAT_INITIAL_MATRIX) { 5514 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5515 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5516 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5517 } 5518 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5519 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5520 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5521 PetscFunctionReturn(0); 5522 } 5523 5524 #if defined(PETSC_HAVE_MUMPS) 5525 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5526 #endif 5527 #if defined(PETSC_HAVE_PASTIX) 5528 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5529 #endif 5530 #if defined(PETSC_HAVE_SUPERLU_DIST) 5531 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5532 #endif 5533 #if defined(PETSC_HAVE_CLIQUE) 5534 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5535 #endif 5536 5537 /*MC 5538 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5539 5540 Options Database Keys: 5541 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5542 5543 Level: beginner 5544 5545 .seealso: MatCreateAIJ() 5546 M*/ 5547 5548 #undef __FUNCT__ 5549 #define __FUNCT__ "MatCreate_MPIAIJ" 5550 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5551 { 5552 Mat_MPIAIJ *b; 5553 PetscErrorCode ierr; 5554 PetscMPIInt size; 5555 5556 PetscFunctionBegin; 5557 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5558 5559 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5560 B->data = (void*)b; 5561 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5562 B->assembled = PETSC_FALSE; 5563 B->insertmode = NOT_SET_VALUES; 5564 b->size = size; 5565 5566 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5567 5568 /* build cache for off array entries formed */ 5569 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5570 5571 b->donotstash = PETSC_FALSE; 5572 b->colmap = 0; 5573 b->garray = 0; 5574 b->roworiented = PETSC_TRUE; 5575 5576 /* stuff used for matrix vector multiply */ 5577 b->lvec = NULL; 5578 b->Mvctx = NULL; 5579 5580 /* stuff for MatGetRow() */ 5581 b->rowindices = 0; 5582 b->rowvalues = 0; 5583 b->getrowactive = PETSC_FALSE; 5584 5585 /* flexible pointer used in CUSP/CUSPARSE classes */ 5586 b->spptr = NULL; 5587 5588 #if defined(PETSC_HAVE_MUMPS) 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5590 #endif 5591 #if defined(PETSC_HAVE_PASTIX) 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5593 #endif 5594 #if defined(PETSC_HAVE_SUPERLU_DIST) 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5596 #endif 5597 #if defined(PETSC_HAVE_CLIQUE) 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5599 #endif 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5614 PetscFunctionReturn(0); 5615 } 5616 5617 #undef __FUNCT__ 5618 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5619 /*@ 5620 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5621 and "off-diagonal" part of the matrix in CSR format. 5622 5623 Collective on MPI_Comm 5624 5625 Input Parameters: 5626 + comm - MPI communicator 5627 . m - number of local rows (Cannot be PETSC_DECIDE) 5628 . n - This value should be the same as the local size used in creating the 5629 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5630 calculated if N is given) For square matrices n is almost always m. 5631 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5632 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5633 . i - row indices for "diagonal" portion of matrix 5634 . j - column indices 5635 . a - matrix values 5636 . oi - row indices for "off-diagonal" portion of matrix 5637 . oj - column indices 5638 - oa - matrix values 5639 5640 Output Parameter: 5641 . mat - the matrix 5642 5643 Level: advanced 5644 5645 Notes: 5646 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5647 must free the arrays once the matrix has been destroyed and not before. 5648 5649 The i and j indices are 0 based 5650 5651 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5652 5653 This sets local rows and cannot be used to set off-processor values. 5654 5655 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5656 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5657 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5658 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5659 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5660 communication if it is known that only local entries will be set. 5661 5662 .keywords: matrix, aij, compressed row, sparse, parallel 5663 5664 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5665 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5666 @*/ 5667 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5668 { 5669 PetscErrorCode ierr; 5670 Mat_MPIAIJ *maij; 5671 5672 PetscFunctionBegin; 5673 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5674 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5675 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5676 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5677 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5678 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5679 maij = (Mat_MPIAIJ*) (*mat)->data; 5680 5681 (*mat)->preallocated = PETSC_TRUE; 5682 5683 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5684 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5685 5686 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5687 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5688 5689 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5691 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5692 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5693 5694 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5695 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5697 PetscFunctionReturn(0); 5698 } 5699 5700 /* 5701 Special version for direct calls from Fortran 5702 */ 5703 #include <petsc-private/fortranimpl.h> 5704 5705 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5706 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5707 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5708 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5709 #endif 5710 5711 /* Change these macros so can be used in void function */ 5712 #undef CHKERRQ 5713 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5714 #undef SETERRQ2 5715 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5716 #undef SETERRQ3 5717 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5718 #undef SETERRQ 5719 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5720 5721 #undef __FUNCT__ 5722 #define __FUNCT__ "matsetvaluesmpiaij_" 5723 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5724 { 5725 Mat mat = *mmat; 5726 PetscInt m = *mm, n = *mn; 5727 InsertMode addv = *maddv; 5728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5729 PetscScalar value; 5730 PetscErrorCode ierr; 5731 5732 MatCheckPreallocated(mat,1); 5733 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5734 5735 #if defined(PETSC_USE_DEBUG) 5736 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5737 #endif 5738 { 5739 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5740 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5741 PetscBool roworiented = aij->roworiented; 5742 5743 /* Some Variables required in the macro */ 5744 Mat A = aij->A; 5745 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5746 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5747 MatScalar *aa = a->a; 5748 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5749 Mat B = aij->B; 5750 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5751 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5752 MatScalar *ba = b->a; 5753 5754 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5755 PetscInt nonew = a->nonew; 5756 MatScalar *ap1,*ap2; 5757 5758 PetscFunctionBegin; 5759 for (i=0; i<m; i++) { 5760 if (im[i] < 0) continue; 5761 #if defined(PETSC_USE_DEBUG) 5762 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5763 #endif 5764 if (im[i] >= rstart && im[i] < rend) { 5765 row = im[i] - rstart; 5766 lastcol1 = -1; 5767 rp1 = aj + ai[row]; 5768 ap1 = aa + ai[row]; 5769 rmax1 = aimax[row]; 5770 nrow1 = ailen[row]; 5771 low1 = 0; 5772 high1 = nrow1; 5773 lastcol2 = -1; 5774 rp2 = bj + bi[row]; 5775 ap2 = ba + bi[row]; 5776 rmax2 = bimax[row]; 5777 nrow2 = bilen[row]; 5778 low2 = 0; 5779 high2 = nrow2; 5780 5781 for (j=0; j<n; j++) { 5782 if (roworiented) value = v[i*n+j]; 5783 else value = v[i+j*m]; 5784 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5785 if (in[j] >= cstart && in[j] < cend) { 5786 col = in[j] - cstart; 5787 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5788 } else if (in[j] < 0) continue; 5789 #if defined(PETSC_USE_DEBUG) 5790 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5791 #endif 5792 else { 5793 if (mat->was_assembled) { 5794 if (!aij->colmap) { 5795 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5796 } 5797 #if defined(PETSC_USE_CTABLE) 5798 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5799 col--; 5800 #else 5801 col = aij->colmap[in[j]] - 1; 5802 #endif 5803 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5804 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5805 col = in[j]; 5806 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5807 B = aij->B; 5808 b = (Mat_SeqAIJ*)B->data; 5809 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5810 rp2 = bj + bi[row]; 5811 ap2 = ba + bi[row]; 5812 rmax2 = bimax[row]; 5813 nrow2 = bilen[row]; 5814 low2 = 0; 5815 high2 = nrow2; 5816 bm = aij->B->rmap->n; 5817 ba = b->a; 5818 } 5819 } else col = in[j]; 5820 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5821 } 5822 } 5823 } else if (!aij->donotstash) { 5824 if (roworiented) { 5825 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5826 } else { 5827 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5828 } 5829 } 5830 } 5831 } 5832 PetscFunctionReturnVoid(); 5833 } 5834 5835