1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) { 1318 rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } 1320 } else { 1321 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1322 rlen = mat->rmap->n; 1323 } 1324 1325 /* load up the local row counts */ 1326 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1327 for (i=0; i<mat->rmap->n; i++) { 1328 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1329 } 1330 1331 /* store the row lengths to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1337 rlen = range[i+1] - range[i]; 1338 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 } 1341 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1344 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1348 1349 /* load up the local column indices */ 1350 nzmax = nz; /* )th processor needs space a largest processor needs */ 1351 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if ( (col = garray[B->j[j]]) > cstart) break; 1357 column_indices[cnt++] = col; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) { 1360 column_indices[cnt++] = A->j[k] + cstart; 1361 } 1362 for (; j<B->i[i+1]; j++) { 1363 column_indices[cnt++] = garray[B->j[j]]; 1364 } 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column indices to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1388 1389 /* load up the local column values */ 1390 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1391 cnt = 0; 1392 for (i=0; i<mat->rmap->n; i++) { 1393 for (j=B->i[i]; j<B->i[i+1]; j++) { 1394 if ( garray[B->j[j]] > cstart) break; 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 for (k=A->i[i]; k<A->i[i+1]; k++) { 1398 column_values[cnt++] = A->a[k]; 1399 } 1400 for (; j<B->i[i+1]; j++) { 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 } 1404 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1405 1406 /* store the column values to the file */ 1407 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1408 if (!rank) { 1409 MPI_Status status; 1410 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 for (i=1; i<size; i++) { 1412 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1413 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1414 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1415 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1421 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(column_values);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 PetscErrorCode ierr; 1435 PetscMPIInt rank = aij->rank,size = aij->size; 1436 PetscBool isdraw,iascii,isbinary; 1437 PetscViewer sviewer; 1438 PetscViewerFormat format; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1444 if (iascii) { 1445 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1446 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1447 MatInfo info; 1448 PetscBool inodes; 1449 1450 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1451 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1452 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 if (!inodes) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1456 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1457 } else { 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1459 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1460 } 1461 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1465 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1468 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1471 PetscInt inodecount,inodelimit,*inodes; 1472 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1473 if (inodes) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1475 } else { 1476 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1480 PetscFunctionReturn(0); 1481 } 1482 } else if (isbinary) { 1483 if (size == 1) { 1484 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (isdraw) { 1491 PetscDraw draw; 1492 PetscBool isnull; 1493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1495 } 1496 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 /* assemble the entire matrix onto first processor. */ 1502 Mat A; 1503 Mat_SeqAIJ *Aloc; 1504 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1505 MatScalar *a; 1506 1507 if (mat->rmap->N > 1024) { 1508 PetscBool flg = PETSC_FALSE; 1509 1510 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1511 if (!flg) { 1512 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1513 } 1514 } 1515 1516 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1517 if (!rank) { 1518 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1521 } 1522 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1523 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1527 1528 /* copy over the A part */ 1529 Aloc = (Mat_SeqAIJ*)aij->A->data; 1530 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 row = mat->rmap->rstart; 1532 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1533 for (i=0; i<m; i++) { 1534 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1535 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1536 } 1537 aj = Aloc->j; 1538 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1539 1540 /* copy over the B part */ 1541 Aloc = (Mat_SeqAIJ*)aij->B->data; 1542 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1545 ct = cols; 1546 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1547 for (i=0; i<m; i++) { 1548 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1549 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1550 } 1551 ierr = PetscFree(ct);CHKERRQ(ierr); 1552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 Everyone has to call to draw the matrix since the graphics waits are 1556 synchronized across all processors that share the PetscDraw object 1557 */ 1558 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 if (!rank) { 1560 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1561 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1562 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1563 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatView_MPIAIJ" 1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1574 { 1575 PetscErrorCode ierr; 1576 PetscBool iascii,isdraw,issocket,isbinary; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1583 if (iascii || isdraw || isbinary || issocket) { 1584 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1585 } else { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSOR_MPIAIJ" 1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1596 PetscErrorCode ierr; 1597 Vec bb1 = 0; 1598 PetscBool hasop; 1599 1600 PetscFunctionBegin; 1601 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1602 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1603 } 1604 1605 if (flag == SOR_APPLY_UPPER) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1611 if (flag & SOR_ZERO_INITIAL_GUESS) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 its--; 1614 } 1615 1616 while (its--) { 1617 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 1620 /* update rhs: bb1 = bb - B*x */ 1621 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1622 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1626 } 1627 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 while (its--) { 1633 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* update rhs: bb1 = bb - B*x */ 1637 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1638 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1639 1640 /* local sweep */ 1641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1642 } 1643 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1644 if (flag & SOR_ZERO_INITIAL_GUESS) { 1645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1646 its--; 1647 } 1648 while (its--) { 1649 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 1652 /* update rhs: bb1 = bb - B*x */ 1653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1655 1656 /* local sweep */ 1657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1658 } 1659 } else if (flag & SOR_EISENSTAT) { 1660 Vec xx1; 1661 1662 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1664 1665 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 if (!mat->diag) { 1668 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1669 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1670 } 1671 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1672 if (hasop) { 1673 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1674 } else { 1675 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1676 } 1677 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1678 1679 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1680 1681 /* local sweep */ 1682 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1683 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1684 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1685 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1686 1687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatPermute_MPIAIJ" 1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1694 { 1695 MPI_Comm comm; 1696 PetscInt first,local_rowsize,local_colsize; 1697 const PetscInt *rows; 1698 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1703 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1704 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1710 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1721 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1722 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1724 /* now we just get the submatrix */ 1725 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1726 /* clean up */ 1727 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1728 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1745 isend[3] = info->memory; isend[4] = info->mallocs; 1746 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1747 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1748 isend[3] += info->memory; isend[4] += info->mallocs; 1749 if (flag == MAT_LOCAL) { 1750 info->nz_used = isend[0]; 1751 info->nz_allocated = isend[1]; 1752 info->nz_unneeded = isend[2]; 1753 info->memory = isend[3]; 1754 info->mallocs = isend[4]; 1755 } else if (flag == MAT_GLOBAL_MAX) { 1756 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } else if (flag == MAT_GLOBAL_SUM) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } 1770 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1771 info->fill_ratio_needed = 0; 1772 info->factor_mallocs = 0; 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetOption_MPIAIJ" 1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1780 { 1781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 switch (op) { 1786 case MAT_NEW_NONZERO_LOCATIONS: 1787 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1788 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1789 case MAT_KEEP_NONZERO_PATTERN: 1790 case MAT_NEW_NONZERO_LOCATION_ERR: 1791 case MAT_USE_INODES: 1792 case MAT_IGNORE_ZERO_ENTRIES: 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_ROW_ORIENTED: 1797 a->roworiented = flg; 1798 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1799 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1800 break; 1801 case MAT_NEW_DIAGONALS: 1802 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1803 break; 1804 case MAT_IGNORE_OFF_PROC_ENTRIES: 1805 a->donotstash = flg; 1806 break; 1807 case MAT_SPD: 1808 A->spd_set = PETSC_TRUE; 1809 A->spd = flg; 1810 if (flg) { 1811 A->symmetric = PETSC_TRUE; 1812 A->structurally_symmetric = PETSC_TRUE; 1813 A->symmetric_set = PETSC_TRUE; 1814 A->structurally_symmetric_set = PETSC_TRUE; 1815 } 1816 break; 1817 case MAT_SYMMETRIC: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_STRUCTURALLY_SYMMETRIC: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 case MAT_HERMITIAN: 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_SYMMETRY_ETERNAL: 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 break; 1829 default: 1830 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1831 } 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatGetRow_MPIAIJ" 1837 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1838 { 1839 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1840 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1841 PetscErrorCode ierr; 1842 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1843 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1844 PetscInt *cmap,*idx_p; 1845 1846 PetscFunctionBegin; 1847 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1848 mat->getrowactive = PETSC_TRUE; 1849 1850 if (!mat->rowvalues && (idx || v)) { 1851 /* 1852 allocate enough space to hold information from the longest row. 1853 */ 1854 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1855 PetscInt max = 1,tmp; 1856 for (i=0; i<matin->rmap->n; i++) { 1857 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1858 if (max < tmp) { max = tmp; } 1859 } 1860 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1861 } 1862 1863 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1864 lrow = row - rstart; 1865 1866 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1867 if (!v) {pvA = 0; pvB = 0;} 1868 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1869 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1870 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1871 nztot = nzA + nzB; 1872 1873 cmap = mat->garray; 1874 if (v || idx) { 1875 if (nztot) { 1876 /* Sort by increasing column numbers, assuming A and B already sorted */ 1877 PetscInt imark = -1; 1878 if (v) { 1879 *v = v_p = mat->rowvalues; 1880 for (i=0; i<nzB; i++) { 1881 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1882 else break; 1883 } 1884 imark = i; 1885 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1886 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1887 } 1888 if (idx) { 1889 *idx = idx_p = mat->rowindices; 1890 if (imark > -1) { 1891 for (i=0; i<imark; i++) { 1892 idx_p[i] = cmap[cworkB[i]]; 1893 } 1894 } else { 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1897 else break; 1898 } 1899 imark = i; 1900 } 1901 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1902 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1903 } 1904 } else { 1905 if (idx) *idx = 0; 1906 if (v) *v = 0; 1907 } 1908 } 1909 *nz = nztot; 1910 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1911 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1917 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1918 { 1919 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1920 1921 PetscFunctionBegin; 1922 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1923 aij->getrowactive = PETSC_FALSE; 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatNorm_MPIAIJ" 1929 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1930 { 1931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1932 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1933 PetscErrorCode ierr; 1934 PetscInt i,j,cstart = mat->cmap->rstart; 1935 PetscReal sum = 0.0; 1936 MatScalar *v; 1937 1938 PetscFunctionBegin; 1939 if (aij->size == 1) { 1940 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1941 } else { 1942 if (type == NORM_FROBENIUS) { 1943 v = amat->a; 1944 for (i=0; i<amat->nz; i++) { 1945 #if defined(PETSC_USE_COMPLEX) 1946 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1947 #else 1948 sum += (*v)*(*v); v++; 1949 #endif 1950 } 1951 v = bmat->a; 1952 for (i=0; i<bmat->nz; i++) { 1953 #if defined(PETSC_USE_COMPLEX) 1954 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1955 #else 1956 sum += (*v)*(*v); v++; 1957 #endif 1958 } 1959 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1960 *norm = PetscSqrtReal(*norm); 1961 } else if (type == NORM_1) { /* max column norm */ 1962 PetscReal *tmp,*tmp2; 1963 PetscInt *jj,*garray = aij->garray; 1964 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1965 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1966 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1967 *norm = 0.0; 1968 v = amat->a; jj = amat->j; 1969 for (j=0; j<amat->nz; j++) { 1970 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1971 } 1972 v = bmat->a; jj = bmat->j; 1973 for (j=0; j<bmat->nz; j++) { 1974 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1975 } 1976 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1977 for (j=0; j<mat->cmap->N; j++) { 1978 if (tmp2[j] > *norm) *norm = tmp2[j]; 1979 } 1980 ierr = PetscFree(tmp);CHKERRQ(ierr); 1981 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1982 } else if (type == NORM_INFINITY) { /* max row norm */ 1983 PetscReal ntemp = 0.0; 1984 for (j=0; j<aij->A->rmap->n; j++) { 1985 v = amat->a + amat->i[j]; 1986 sum = 0.0; 1987 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 v = bmat->a + bmat->i[j]; 1991 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 if (sum > ntemp) ntemp = sum; 1995 } 1996 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 1997 } else { 1998 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 1999 } 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatTranspose_MPIAIJ" 2006 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2007 { 2008 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2009 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2010 PetscErrorCode ierr; 2011 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2012 PetscInt cstart=A->cmap->rstart,ncol; 2013 Mat B; 2014 MatScalar *array; 2015 2016 PetscFunctionBegin; 2017 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2018 2019 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2020 ai = Aloc->i; aj = Aloc->j; 2021 bi = Bloc->i; bj = Bloc->j; 2022 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2023 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2024 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2025 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2026 for (i=0; i<ai[ma]; i++){ 2027 d_nnz[aj[i]] ++; 2028 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2029 } 2030 2031 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2032 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2033 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2034 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2035 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2036 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2037 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2038 } else { 2039 B = *matout; 2040 } 2041 2042 /* copy over the A part */ 2043 array = Aloc->a; 2044 row = A->rmap->rstart; 2045 for (i=0; i<ma; i++) { 2046 ncol = ai[i+1]-ai[i]; 2047 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2048 row++; array += ncol; aj += ncol; 2049 } 2050 aj = Aloc->j; 2051 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2052 2053 /* copy over the B part */ 2054 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2055 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2056 array = Bloc->a; 2057 row = A->rmap->rstart; 2058 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2059 cols_tmp = cols; 2060 for (i=0; i<mb; i++) { 2061 ncol = bi[i+1]-bi[i]; 2062 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2063 row++; array += ncol; cols_tmp += ncol; 2064 } 2065 ierr = PetscFree(cols);CHKERRQ(ierr); 2066 2067 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2070 *matout = B; 2071 } else { 2072 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2079 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2080 { 2081 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2082 Mat a = aij->A,b = aij->B; 2083 PetscErrorCode ierr; 2084 PetscInt s1,s2,s3; 2085 2086 PetscFunctionBegin; 2087 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2088 if (rr) { 2089 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2090 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2091 /* Overlap communication with computation. */ 2092 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2093 } 2094 if (ll) { 2095 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2096 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2097 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2098 } 2099 /* scale the diagonal block */ 2100 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2101 2102 if (rr) { 2103 /* Do a scatter end and then right scale the off-diagonal block */ 2104 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2105 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2106 } 2107 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2113 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2114 { 2115 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatEqual_MPIAIJ" 2125 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2126 { 2127 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2128 Mat a,b,c,d; 2129 PetscBool flg; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 a = matA->A; b = matA->B; 2134 c = matB->A; d = matB->B; 2135 2136 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2137 if (flg) { 2138 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2139 } 2140 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatCopy_MPIAIJ" 2146 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2147 { 2148 PetscErrorCode ierr; 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2151 2152 PetscFunctionBegin; 2153 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2154 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2155 /* because of the column compression in the off-processor part of the matrix a->B, 2156 the number of columns in a->B and b->B may be different, hence we cannot call 2157 the MatCopy() directly on the two parts. If need be, we can provide a more 2158 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2159 then copying the submatrices */ 2160 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2161 } else { 2162 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2163 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatSetUp_MPIAIJ" 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2182 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2183 { 2184 PetscInt i,m=Y->rmap->N; 2185 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2186 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2187 const PetscInt *xi = x->i,*yi = y->i; 2188 2189 PetscFunctionBegin; 2190 /* Set the number of nonzeros in the new matrix */ 2191 for(i=0; i<m; i++) { 2192 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2193 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2194 nnz[i] = 0; 2195 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2196 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2197 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2198 nnz[i]++; 2199 } 2200 for (; k<nzy; k++) nnz[i]++; 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatAXPY_MPIAIJ" 2207 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2208 { 2209 PetscErrorCode ierr; 2210 PetscInt i; 2211 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2212 PetscBLASInt bnz,one=1; 2213 Mat_SeqAIJ *x,*y; 2214 2215 PetscFunctionBegin; 2216 if (str == SAME_NONZERO_PATTERN) { 2217 PetscScalar alpha = a; 2218 x = (Mat_SeqAIJ *)xx->A->data; 2219 y = (Mat_SeqAIJ *)yy->A->data; 2220 bnz = PetscBLASIntCast(x->nz); 2221 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2222 x = (Mat_SeqAIJ *)xx->B->data; 2223 y = (Mat_SeqAIJ *)yy->B->data; 2224 bnz = PetscBLASIntCast(x->nz); 2225 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2226 } else if (str == SUBSET_NONZERO_PATTERN) { 2227 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2228 2229 x = (Mat_SeqAIJ *)xx->B->data; 2230 y = (Mat_SeqAIJ *)yy->B->data; 2231 if (y->xtoy && y->XtoY != xx->B) { 2232 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2233 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2234 } 2235 if (!y->xtoy) { /* get xtoy */ 2236 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2237 y->XtoY = xx->B; 2238 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2239 } 2240 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2241 } else { 2242 Mat B; 2243 PetscInt *nnz_d,*nnz_o; 2244 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2245 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2246 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2247 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2248 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2249 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2250 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2254 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2255 ierr = MatHeaderReplace(Y,B); 2256 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatConjugate_MPIAIJ" 2266 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2267 { 2268 #if defined(PETSC_USE_COMPLEX) 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2271 2272 PetscFunctionBegin; 2273 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2274 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2275 #else 2276 PetscFunctionBegin; 2277 #endif 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatRealPart_MPIAIJ" 2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2290 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2297 { 2298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2299 PetscErrorCode ierr; 2300 2301 PetscFunctionBegin; 2302 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2303 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #ifdef PETSC_HAVE_PBGL 2308 2309 #include <boost/parallel/mpi/bsp_process_group.hpp> 2310 #include <boost/graph/distributed/ilu_default_graph.hpp> 2311 #include <boost/graph/distributed/ilu_0_block.hpp> 2312 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2313 #include <boost/graph/distributed/petsc/interface.hpp> 2314 #include <boost/multi_array.hpp> 2315 #include <boost/parallel/distributed_property_map->hpp> 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2319 /* 2320 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2321 */ 2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2323 { 2324 namespace petsc = boost::distributed::petsc; 2325 2326 namespace graph_dist = boost::graph::distributed; 2327 using boost::graph::distributed::ilu_default::process_group_type; 2328 using boost::graph::ilu_permuted; 2329 2330 PetscBool row_identity, col_identity; 2331 PetscContainer c; 2332 PetscInt m, n, M, N; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2337 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2338 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2339 if (!row_identity || !col_identity) { 2340 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2341 } 2342 2343 process_group_type pg; 2344 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2345 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2346 lgraph_type& level_graph = *lgraph_p; 2347 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2348 2349 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2350 ilu_permuted(level_graph); 2351 2352 /* put together the new matrix */ 2353 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2354 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2355 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2356 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2357 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2358 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2359 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2360 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2361 2362 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2363 ierr = PetscContainerSetPointer(c, lgraph_p); 2364 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2365 ierr = PetscContainerDestroy(&c); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2372 { 2373 PetscFunctionBegin; 2374 PetscFunctionReturn(0); 2375 } 2376 2377 #undef __FUNCT__ 2378 #define __FUNCT__ "MatSolve_MPIAIJ" 2379 /* 2380 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2381 */ 2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2383 { 2384 namespace graph_dist = boost::graph::distributed; 2385 2386 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2387 lgraph_type* lgraph_p; 2388 PetscContainer c; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2393 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2394 ierr = VecCopy(b, x);CHKERRQ(ierr); 2395 2396 PetscScalar* array_x; 2397 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2398 PetscInt sx; 2399 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2400 2401 PetscScalar* array_b; 2402 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2403 PetscInt sb; 2404 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2405 2406 lgraph_type& level_graph = *lgraph_p; 2407 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2408 2409 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2410 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2411 ref_x(array_x, boost::extents[num_vertices(graph)]); 2412 2413 typedef boost::iterator_property_map<array_ref_type::iterator, 2414 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2415 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2416 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2417 2418 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2419 2420 PetscFunctionReturn(0); 2421 } 2422 #endif 2423 2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2425 PetscInt nzlocal,nsends,nrecvs; 2426 PetscMPIInt *send_rank,*recv_rank; 2427 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2428 PetscScalar *sbuf_a,**rbuf_a; 2429 PetscErrorCode (*Destroy)(Mat); 2430 } Mat_Redundant; 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2435 { 2436 PetscErrorCode ierr; 2437 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2438 PetscInt i; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2442 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2443 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2444 for (i=0; i<redund->nrecvs; i++){ 2445 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2446 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2447 } 2448 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2449 ierr = PetscFree(redund);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatDestroy_MatRedundant" 2455 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2456 { 2457 PetscErrorCode ierr; 2458 PetscContainer container; 2459 Mat_Redundant *redund=PETSC_NULL; 2460 2461 PetscFunctionBegin; 2462 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2463 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2464 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2465 A->ops->destroy = redund->Destroy; 2466 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2467 if (A->ops->destroy) { 2468 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2476 { 2477 PetscMPIInt rank,size; 2478 MPI_Comm comm=((PetscObject)mat)->comm; 2479 PetscErrorCode ierr; 2480 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2481 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2482 PetscInt *rowrange=mat->rmap->range; 2483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2484 Mat A=aij->A,B=aij->B,C=*matredundant; 2485 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2486 PetscScalar *sbuf_a; 2487 PetscInt nzlocal=a->nz+b->nz; 2488 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2489 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2490 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2491 MatScalar *aworkA,*aworkB; 2492 PetscScalar *vals; 2493 PetscMPIInt tag1,tag2,tag3,imdex; 2494 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2495 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2496 MPI_Status recv_status,*send_status; 2497 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2498 PetscInt **rbuf_j=PETSC_NULL; 2499 PetscScalar **rbuf_a=PETSC_NULL; 2500 Mat_Redundant *redund=PETSC_NULL; 2501 PetscContainer container; 2502 2503 PetscFunctionBegin; 2504 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2506 2507 if (reuse == MAT_REUSE_MATRIX) { 2508 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2509 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2510 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2511 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2512 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2513 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2514 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2515 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2516 2517 nsends = redund->nsends; 2518 nrecvs = redund->nrecvs; 2519 send_rank = redund->send_rank; 2520 recv_rank = redund->recv_rank; 2521 sbuf_nz = redund->sbuf_nz; 2522 rbuf_nz = redund->rbuf_nz; 2523 sbuf_j = redund->sbuf_j; 2524 sbuf_a = redund->sbuf_a; 2525 rbuf_j = redund->rbuf_j; 2526 rbuf_a = redund->rbuf_a; 2527 } 2528 2529 if (reuse == MAT_INITIAL_MATRIX){ 2530 PetscMPIInt subrank,subsize; 2531 PetscInt nleftover,np_subcomm; 2532 /* get the destination processors' id send_rank, nsends and nrecvs */ 2533 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2534 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2535 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2536 np_subcomm = size/nsubcomm; 2537 nleftover = size - nsubcomm*np_subcomm; 2538 nsends = 0; nrecvs = 0; 2539 for (i=0; i<size; i++){ /* i=rank*/ 2540 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2541 send_rank[nsends] = i; nsends++; 2542 recv_rank[nrecvs++] = i; 2543 } 2544 } 2545 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2546 i = size-nleftover-1; 2547 j = 0; 2548 while (j < nsubcomm - nleftover){ 2549 send_rank[nsends++] = i; 2550 i--; j++; 2551 } 2552 } 2553 2554 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2555 for (i=0; i<nleftover; i++){ 2556 recv_rank[nrecvs++] = size-nleftover+i; 2557 } 2558 } 2559 2560 /* allocate sbuf_j, sbuf_a */ 2561 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2562 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2563 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2564 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2565 2566 /* copy mat's local entries into the buffers */ 2567 if (reuse == MAT_INITIAL_MATRIX){ 2568 rownz_max = 0; 2569 rptr = sbuf_j; 2570 cols = sbuf_j + rend-rstart + 1; 2571 vals = sbuf_a; 2572 rptr[0] = 0; 2573 for (i=0; i<rend-rstart; i++){ 2574 row = i + rstart; 2575 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2576 ncols = nzA + nzB; 2577 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2578 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2579 /* load the column indices for this row into cols */ 2580 lwrite = 0; 2581 for (l=0; l<nzB; l++) { 2582 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2583 vals[lwrite] = aworkB[l]; 2584 cols[lwrite++] = ctmp; 2585 } 2586 } 2587 for (l=0; l<nzA; l++){ 2588 vals[lwrite] = aworkA[l]; 2589 cols[lwrite++] = cstart + cworkA[l]; 2590 } 2591 for (l=0; l<nzB; l++) { 2592 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2593 vals[lwrite] = aworkB[l]; 2594 cols[lwrite++] = ctmp; 2595 } 2596 } 2597 vals += ncols; 2598 cols += ncols; 2599 rptr[i+1] = rptr[i] + ncols; 2600 if (rownz_max < ncols) rownz_max = ncols; 2601 } 2602 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2603 } else { /* only copy matrix values into sbuf_a */ 2604 rptr = sbuf_j; 2605 vals = sbuf_a; 2606 rptr[0] = 0; 2607 for (i=0; i<rend-rstart; i++){ 2608 row = i + rstart; 2609 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2610 ncols = nzA + nzB; 2611 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2612 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2613 lwrite = 0; 2614 for (l=0; l<nzB; l++) { 2615 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2616 } 2617 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2618 for (l=0; l<nzB; l++) { 2619 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2620 } 2621 vals += ncols; 2622 rptr[i+1] = rptr[i] + ncols; 2623 } 2624 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2625 2626 /* send nzlocal to others, and recv other's nzlocal */ 2627 /*--------------------------------------------------*/ 2628 if (reuse == MAT_INITIAL_MATRIX){ 2629 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2630 s_waits2 = s_waits3 + nsends; 2631 s_waits1 = s_waits2 + nsends; 2632 r_waits1 = s_waits1 + nsends; 2633 r_waits2 = r_waits1 + nrecvs; 2634 r_waits3 = r_waits2 + nrecvs; 2635 } else { 2636 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2637 r_waits3 = s_waits3 + nsends; 2638 } 2639 2640 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2641 if (reuse == MAT_INITIAL_MATRIX){ 2642 /* get new tags to keep the communication clean */ 2643 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2644 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2645 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2646 2647 /* post receives of other's nzlocal */ 2648 for (i=0; i<nrecvs; i++){ 2649 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2650 } 2651 /* send nzlocal to others */ 2652 for (i=0; i<nsends; i++){ 2653 sbuf_nz[i] = nzlocal; 2654 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2655 } 2656 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2657 count = nrecvs; 2658 while (count) { 2659 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2660 recv_rank[imdex] = recv_status.MPI_SOURCE; 2661 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2662 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2663 2664 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2665 rbuf_nz[imdex] += i + 2; 2666 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2667 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2668 count--; 2669 } 2670 /* wait on sends of nzlocal */ 2671 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2672 /* send mat->i,j to others, and recv from other's */ 2673 /*------------------------------------------------*/ 2674 for (i=0; i<nsends; i++){ 2675 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2676 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2677 } 2678 /* wait on receives of mat->i,j */ 2679 /*------------------------------*/ 2680 count = nrecvs; 2681 while (count) { 2682 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2683 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2684 count--; 2685 } 2686 /* wait on sends of mat->i,j */ 2687 /*---------------------------*/ 2688 if (nsends) { 2689 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2690 } 2691 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2692 2693 /* post receives, send and receive mat->a */ 2694 /*----------------------------------------*/ 2695 for (imdex=0; imdex<nrecvs; imdex++) { 2696 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2697 } 2698 for (i=0; i<nsends; i++){ 2699 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2700 } 2701 count = nrecvs; 2702 while (count) { 2703 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2704 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2705 count--; 2706 } 2707 if (nsends) { 2708 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2709 } 2710 2711 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2712 2713 /* create redundant matrix */ 2714 /*-------------------------*/ 2715 if (reuse == MAT_INITIAL_MATRIX){ 2716 /* compute rownz_max for preallocation */ 2717 for (imdex=0; imdex<nrecvs; imdex++){ 2718 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2719 rptr = rbuf_j[imdex]; 2720 for (i=0; i<j; i++){ 2721 ncols = rptr[i+1] - rptr[i]; 2722 if (rownz_max < ncols) rownz_max = ncols; 2723 } 2724 } 2725 2726 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2727 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2728 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2729 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2730 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2731 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2732 } else { 2733 C = *matredundant; 2734 } 2735 2736 /* insert local matrix entries */ 2737 rptr = sbuf_j; 2738 cols = sbuf_j + rend-rstart + 1; 2739 vals = sbuf_a; 2740 for (i=0; i<rend-rstart; i++){ 2741 row = i + rstart; 2742 ncols = rptr[i+1] - rptr[i]; 2743 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2744 vals += ncols; 2745 cols += ncols; 2746 } 2747 /* insert received matrix entries */ 2748 for (imdex=0; imdex<nrecvs; imdex++){ 2749 rstart = rowrange[recv_rank[imdex]]; 2750 rend = rowrange[recv_rank[imdex]+1]; 2751 rptr = rbuf_j[imdex]; 2752 cols = rbuf_j[imdex] + rend-rstart + 1; 2753 vals = rbuf_a[imdex]; 2754 for (i=0; i<rend-rstart; i++){ 2755 row = i + rstart; 2756 ncols = rptr[i+1] - rptr[i]; 2757 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2758 vals += ncols; 2759 cols += ncols; 2760 } 2761 } 2762 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2763 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2764 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2765 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2766 if (reuse == MAT_INITIAL_MATRIX) { 2767 PetscContainer container; 2768 *matredundant = C; 2769 /* create a supporting struct and attach it to C for reuse */ 2770 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2771 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2772 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2773 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2774 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2775 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2776 2777 redund->nzlocal = nzlocal; 2778 redund->nsends = nsends; 2779 redund->nrecvs = nrecvs; 2780 redund->send_rank = send_rank; 2781 redund->recv_rank = recv_rank; 2782 redund->sbuf_nz = sbuf_nz; 2783 redund->rbuf_nz = rbuf_nz; 2784 redund->sbuf_j = sbuf_j; 2785 redund->sbuf_a = sbuf_a; 2786 redund->rbuf_j = rbuf_j; 2787 redund->rbuf_a = rbuf_a; 2788 2789 redund->Destroy = C->ops->destroy; 2790 C->ops->destroy = MatDestroy_MatRedundant; 2791 } 2792 PetscFunctionReturn(0); 2793 } 2794 2795 #undef __FUNCT__ 2796 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2797 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2798 { 2799 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2800 PetscErrorCode ierr; 2801 PetscInt i,*idxb = 0; 2802 PetscScalar *va,*vb; 2803 Vec vtmp; 2804 2805 PetscFunctionBegin; 2806 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2807 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2808 if (idx) { 2809 for (i=0; i<A->rmap->n; i++) { 2810 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2811 } 2812 } 2813 2814 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2815 if (idx) { 2816 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2817 } 2818 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2819 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2820 2821 for (i=0; i<A->rmap->n; i++){ 2822 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2823 va[i] = vb[i]; 2824 if (idx) idx[i] = a->garray[idxb[i]]; 2825 } 2826 } 2827 2828 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2829 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2830 ierr = PetscFree(idxb);CHKERRQ(ierr); 2831 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2837 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2838 { 2839 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2840 PetscErrorCode ierr; 2841 PetscInt i,*idxb = 0; 2842 PetscScalar *va,*vb; 2843 Vec vtmp; 2844 2845 PetscFunctionBegin; 2846 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2847 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2848 if (idx) { 2849 for (i=0; i<A->cmap->n; i++) { 2850 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2851 } 2852 } 2853 2854 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2855 if (idx) { 2856 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2857 } 2858 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2859 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2860 2861 for (i=0; i<A->rmap->n; i++){ 2862 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2863 va[i] = vb[i]; 2864 if (idx) idx[i] = a->garray[idxb[i]]; 2865 } 2866 } 2867 2868 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2870 ierr = PetscFree(idxb);CHKERRQ(ierr); 2871 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2872 PetscFunctionReturn(0); 2873 } 2874 2875 #undef __FUNCT__ 2876 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2877 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2878 { 2879 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2880 PetscInt n = A->rmap->n; 2881 PetscInt cstart = A->cmap->rstart; 2882 PetscInt *cmap = mat->garray; 2883 PetscInt *diagIdx, *offdiagIdx; 2884 Vec diagV, offdiagV; 2885 PetscScalar *a, *diagA, *offdiagA; 2886 PetscInt r; 2887 PetscErrorCode ierr; 2888 2889 PetscFunctionBegin; 2890 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2891 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2892 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2893 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2894 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2895 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2896 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2897 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2898 for(r = 0; r < n; ++r) { 2899 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2900 a[r] = diagA[r]; 2901 idx[r] = cstart + diagIdx[r]; 2902 } else { 2903 a[r] = offdiagA[r]; 2904 idx[r] = cmap[offdiagIdx[r]]; 2905 } 2906 } 2907 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2908 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2909 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2910 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2911 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2912 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2913 PetscFunctionReturn(0); 2914 } 2915 2916 #undef __FUNCT__ 2917 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2918 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2919 { 2920 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2921 PetscInt n = A->rmap->n; 2922 PetscInt cstart = A->cmap->rstart; 2923 PetscInt *cmap = mat->garray; 2924 PetscInt *diagIdx, *offdiagIdx; 2925 Vec diagV, offdiagV; 2926 PetscScalar *a, *diagA, *offdiagA; 2927 PetscInt r; 2928 PetscErrorCode ierr; 2929 2930 PetscFunctionBegin; 2931 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2932 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2933 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2934 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2935 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2936 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2937 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2938 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2939 for(r = 0; r < n; ++r) { 2940 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2941 a[r] = diagA[r]; 2942 idx[r] = cstart + diagIdx[r]; 2943 } else { 2944 a[r] = offdiagA[r]; 2945 idx[r] = cmap[offdiagIdx[r]]; 2946 } 2947 } 2948 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2950 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2951 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2952 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2953 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 #undef __FUNCT__ 2958 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2959 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2960 { 2961 PetscErrorCode ierr; 2962 Mat *dummy; 2963 2964 PetscFunctionBegin; 2965 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2966 *newmat = *dummy; 2967 ierr = PetscFree(dummy);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2975 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2976 { 2977 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2978 PetscErrorCode ierr; 2979 2980 PetscFunctionBegin; 2981 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2982 PetscFunctionReturn(0); 2983 } 2984 2985 2986 /* -------------------------------------------------------------------*/ 2987 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2988 MatGetRow_MPIAIJ, 2989 MatRestoreRow_MPIAIJ, 2990 MatMult_MPIAIJ, 2991 /* 4*/ MatMultAdd_MPIAIJ, 2992 MatMultTranspose_MPIAIJ, 2993 MatMultTransposeAdd_MPIAIJ, 2994 #ifdef PETSC_HAVE_PBGL 2995 MatSolve_MPIAIJ, 2996 #else 2997 0, 2998 #endif 2999 0, 3000 0, 3001 /*10*/ 0, 3002 0, 3003 0, 3004 MatSOR_MPIAIJ, 3005 MatTranspose_MPIAIJ, 3006 /*15*/ MatGetInfo_MPIAIJ, 3007 MatEqual_MPIAIJ, 3008 MatGetDiagonal_MPIAIJ, 3009 MatDiagonalScale_MPIAIJ, 3010 MatNorm_MPIAIJ, 3011 /*20*/ MatAssemblyBegin_MPIAIJ, 3012 MatAssemblyEnd_MPIAIJ, 3013 MatSetOption_MPIAIJ, 3014 MatZeroEntries_MPIAIJ, 3015 /*24*/ MatZeroRows_MPIAIJ, 3016 0, 3017 #ifdef PETSC_HAVE_PBGL 3018 0, 3019 #else 3020 0, 3021 #endif 3022 0, 3023 0, 3024 /*29*/ MatSetUp_MPIAIJ, 3025 #ifdef PETSC_HAVE_PBGL 3026 0, 3027 #else 3028 0, 3029 #endif 3030 0, 3031 0, 3032 0, 3033 /*34*/ MatDuplicate_MPIAIJ, 3034 0, 3035 0, 3036 0, 3037 0, 3038 /*39*/ MatAXPY_MPIAIJ, 3039 MatGetSubMatrices_MPIAIJ, 3040 MatIncreaseOverlap_MPIAIJ, 3041 MatGetValues_MPIAIJ, 3042 MatCopy_MPIAIJ, 3043 /*44*/ MatGetRowMax_MPIAIJ, 3044 MatScale_MPIAIJ, 3045 0, 3046 0, 3047 MatZeroRowsColumns_MPIAIJ, 3048 /*49*/ 0, 3049 0, 3050 0, 3051 0, 3052 0, 3053 /*54*/ MatFDColoringCreate_MPIAIJ, 3054 0, 3055 MatSetUnfactored_MPIAIJ, 3056 MatPermute_MPIAIJ, 3057 0, 3058 /*59*/ MatGetSubMatrix_MPIAIJ, 3059 MatDestroy_MPIAIJ, 3060 MatView_MPIAIJ, 3061 0, 3062 0, 3063 /*64*/ 0, 3064 0, 3065 0, 3066 0, 3067 0, 3068 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3069 MatGetRowMinAbs_MPIAIJ, 3070 0, 3071 MatSetColoring_MPIAIJ, 3072 #if defined(PETSC_HAVE_ADIC) 3073 MatSetValuesAdic_MPIAIJ, 3074 #else 3075 0, 3076 #endif 3077 MatSetValuesAdifor_MPIAIJ, 3078 /*75*/ MatFDColoringApply_AIJ, 3079 0, 3080 0, 3081 0, 3082 0, 3083 /*80*/ 0, 3084 0, 3085 0, 3086 /*83*/ MatLoad_MPIAIJ, 3087 0, 3088 0, 3089 0, 3090 0, 3091 0, 3092 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3093 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3094 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3095 MatPtAP_Basic, 3096 MatPtAPSymbolic_MPIAIJ, 3097 /*94*/ MatPtAPNumeric_MPIAIJ, 3098 0, 3099 0, 3100 0, 3101 0, 3102 /*99*/ 0, 3103 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3104 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3105 MatConjugate_MPIAIJ, 3106 0, 3107 /*104*/MatSetValuesRow_MPIAIJ, 3108 MatRealPart_MPIAIJ, 3109 MatImaginaryPart_MPIAIJ, 3110 0, 3111 0, 3112 /*109*/0, 3113 MatGetRedundantMatrix_MPIAIJ, 3114 MatGetRowMin_MPIAIJ, 3115 0, 3116 0, 3117 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3118 0, 3119 0, 3120 0, 3121 0, 3122 /*119*/0, 3123 0, 3124 0, 3125 0, 3126 MatGetMultiProcBlock_MPIAIJ, 3127 /*124*/MatFindNonZeroRows_MPIAIJ, 3128 MatGetColumnNorms_MPIAIJ, 3129 MatInvertBlockDiagonal_MPIAIJ, 3130 0, 3131 MatGetSubMatricesParallel_MPIAIJ, 3132 /*129*/0, 3133 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3134 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3135 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3136 0, 3137 /*134*/0, 3138 0, 3139 0, 3140 0, 3141 0 3142 }; 3143 3144 /* ----------------------------------------------------------------------------------------*/ 3145 3146 EXTERN_C_BEGIN 3147 #undef __FUNCT__ 3148 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3149 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3150 { 3151 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3152 PetscErrorCode ierr; 3153 3154 PetscFunctionBegin; 3155 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3156 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3157 PetscFunctionReturn(0); 3158 } 3159 EXTERN_C_END 3160 3161 EXTERN_C_BEGIN 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3164 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3165 { 3166 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3167 PetscErrorCode ierr; 3168 3169 PetscFunctionBegin; 3170 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3171 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 EXTERN_C_END 3175 3176 EXTERN_C_BEGIN 3177 #undef __FUNCT__ 3178 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3179 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3180 { 3181 Mat_MPIAIJ *b; 3182 PetscErrorCode ierr; 3183 PetscInt i; 3184 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3185 3186 PetscFunctionBegin; 3187 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3188 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3189 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3190 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3191 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3192 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3193 3194 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3195 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3196 if (d_nnz) { 3197 for (i=0; i<B->rmap->n; i++) { 3198 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3199 } 3200 } 3201 if (o_nnz) { 3202 for (i=0; i<B->rmap->n; i++) { 3203 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3204 } 3205 } 3206 b = (Mat_MPIAIJ*)B->data; 3207 3208 if (!B->preallocated) { 3209 /* Explicitly create 2 MATSEQAIJ matrices. */ 3210 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3211 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3212 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3213 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3214 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3215 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3216 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3217 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3218 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3219 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3220 } 3221 3222 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3223 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3224 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3225 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3226 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3227 B->preallocated = PETSC_TRUE; 3228 PetscFunctionReturn(0); 3229 } 3230 EXTERN_C_END 3231 3232 #undef __FUNCT__ 3233 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3234 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3235 { 3236 Mat mat; 3237 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3238 PetscErrorCode ierr; 3239 3240 PetscFunctionBegin; 3241 *newmat = 0; 3242 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3243 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3244 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3245 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3246 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3247 a = (Mat_MPIAIJ*)mat->data; 3248 3249 mat->factortype = matin->factortype; 3250 mat->rmap->bs = matin->rmap->bs; 3251 mat->cmap->bs = matin->cmap->bs; 3252 mat->assembled = PETSC_TRUE; 3253 mat->insertmode = NOT_SET_VALUES; 3254 mat->preallocated = PETSC_TRUE; 3255 3256 a->size = oldmat->size; 3257 a->rank = oldmat->rank; 3258 a->donotstash = oldmat->donotstash; 3259 a->roworiented = oldmat->roworiented; 3260 a->rowindices = 0; 3261 a->rowvalues = 0; 3262 a->getrowactive = PETSC_FALSE; 3263 3264 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3265 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3266 3267 if (oldmat->colmap) { 3268 #if defined (PETSC_USE_CTABLE) 3269 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3270 #else 3271 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3272 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3273 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3274 #endif 3275 } else a->colmap = 0; 3276 if (oldmat->garray) { 3277 PetscInt len; 3278 len = oldmat->B->cmap->n; 3279 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3280 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3281 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3282 } else a->garray = 0; 3283 3284 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3285 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3286 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3287 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3288 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3289 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3290 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3291 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3292 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3293 *newmat = mat; 3294 PetscFunctionReturn(0); 3295 } 3296 3297 3298 3299 #undef __FUNCT__ 3300 #define __FUNCT__ "MatLoad_MPIAIJ" 3301 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3302 { 3303 PetscScalar *vals,*svals; 3304 MPI_Comm comm = ((PetscObject)viewer)->comm; 3305 PetscErrorCode ierr; 3306 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3307 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3308 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3309 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3310 PetscInt cend,cstart,n,*rowners,sizesset=1; 3311 int fd; 3312 3313 PetscFunctionBegin; 3314 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3315 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3316 if (!rank) { 3317 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3318 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3319 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3320 } 3321 3322 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3323 3324 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3325 M = header[1]; N = header[2]; 3326 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3327 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3328 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3329 3330 /* If global sizes are set, check if they are consistent with that given in the file */ 3331 if (sizesset) { 3332 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3333 } 3334 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3335 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3336 3337 /* determine ownership of all rows */ 3338 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3339 else m = newMat->rmap->n; /* Set by user */ 3340 3341 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3342 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3343 3344 /* First process needs enough room for process with most rows */ 3345 if (!rank) { 3346 mmax = rowners[1]; 3347 for (i=2; i<size; i++) { 3348 mmax = PetscMax(mmax,rowners[i]); 3349 } 3350 } else mmax = m; 3351 3352 rowners[0] = 0; 3353 for (i=2; i<=size; i++) { 3354 rowners[i] += rowners[i-1]; 3355 } 3356 rstart = rowners[rank]; 3357 rend = rowners[rank+1]; 3358 3359 /* distribute row lengths to all processors */ 3360 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3361 if (!rank) { 3362 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3363 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3364 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3365 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3366 for (j=0; j<m; j++) { 3367 procsnz[0] += ourlens[j]; 3368 } 3369 for (i=1; i<size; i++) { 3370 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3371 /* calculate the number of nonzeros on each processor */ 3372 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3373 procsnz[i] += rowlengths[j]; 3374 } 3375 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3376 } 3377 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3378 } else { 3379 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3380 } 3381 3382 if (!rank) { 3383 /* determine max buffer needed and allocate it */ 3384 maxnz = 0; 3385 for (i=0; i<size; i++) { 3386 maxnz = PetscMax(maxnz,procsnz[i]); 3387 } 3388 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3389 3390 /* read in my part of the matrix column indices */ 3391 nz = procsnz[0]; 3392 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3393 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3394 3395 /* read in every one elses and ship off */ 3396 for (i=1; i<size; i++) { 3397 nz = procsnz[i]; 3398 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3399 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3400 } 3401 ierr = PetscFree(cols);CHKERRQ(ierr); 3402 } else { 3403 /* determine buffer space needed for message */ 3404 nz = 0; 3405 for (i=0; i<m; i++) { 3406 nz += ourlens[i]; 3407 } 3408 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3409 3410 /* receive message of column indices*/ 3411 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3412 } 3413 3414 /* determine column ownership if matrix is not square */ 3415 if (N != M) { 3416 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3417 else n = newMat->cmap->n; 3418 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3419 cstart = cend - n; 3420 } else { 3421 cstart = rstart; 3422 cend = rend; 3423 n = cend - cstart; 3424 } 3425 3426 /* loop over local rows, determining number of off diagonal entries */ 3427 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3428 jj = 0; 3429 for (i=0; i<m; i++) { 3430 for (j=0; j<ourlens[i]; j++) { 3431 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3432 jj++; 3433 } 3434 } 3435 3436 for (i=0; i<m; i++) { 3437 ourlens[i] -= offlens[i]; 3438 } 3439 if (!sizesset) { 3440 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3441 } 3442 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3443 3444 for (i=0; i<m; i++) { 3445 ourlens[i] += offlens[i]; 3446 } 3447 3448 if (!rank) { 3449 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3450 3451 /* read in my part of the matrix numerical values */ 3452 nz = procsnz[0]; 3453 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3454 3455 /* insert into matrix */ 3456 jj = rstart; 3457 smycols = mycols; 3458 svals = vals; 3459 for (i=0; i<m; i++) { 3460 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3461 smycols += ourlens[i]; 3462 svals += ourlens[i]; 3463 jj++; 3464 } 3465 3466 /* read in other processors and ship out */ 3467 for (i=1; i<size; i++) { 3468 nz = procsnz[i]; 3469 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3470 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3471 } 3472 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3473 } else { 3474 /* receive numeric values */ 3475 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3476 3477 /* receive message of values*/ 3478 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3479 3480 /* insert into matrix */ 3481 jj = rstart; 3482 smycols = mycols; 3483 svals = vals; 3484 for (i=0; i<m; i++) { 3485 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3486 smycols += ourlens[i]; 3487 svals += ourlens[i]; 3488 jj++; 3489 } 3490 } 3491 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3492 ierr = PetscFree(vals);CHKERRQ(ierr); 3493 ierr = PetscFree(mycols);CHKERRQ(ierr); 3494 ierr = PetscFree(rowners);CHKERRQ(ierr); 3495 3496 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3497 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3498 PetscFunctionReturn(0); 3499 } 3500 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3503 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3504 { 3505 PetscErrorCode ierr; 3506 IS iscol_local; 3507 PetscInt csize; 3508 3509 PetscFunctionBegin; 3510 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3511 if (call == MAT_REUSE_MATRIX) { 3512 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3513 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3514 } else { 3515 PetscInt cbs; 3516 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3517 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3518 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3519 } 3520 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3521 if (call == MAT_INITIAL_MATRIX) { 3522 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3523 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3524 } 3525 PetscFunctionReturn(0); 3526 } 3527 3528 #undef __FUNCT__ 3529 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3530 /* 3531 Not great since it makes two copies of the submatrix, first an SeqAIJ 3532 in local and then by concatenating the local matrices the end result. 3533 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3534 3535 Note: This requires a sequential iscol with all indices. 3536 */ 3537 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3538 { 3539 PetscErrorCode ierr; 3540 PetscMPIInt rank,size; 3541 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3542 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3543 Mat *local,M,Mreuse; 3544 MatScalar *vwork,*aa; 3545 MPI_Comm comm = ((PetscObject)mat)->comm; 3546 Mat_SeqAIJ *aij; 3547 3548 3549 PetscFunctionBegin; 3550 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3551 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3552 3553 if (call == MAT_REUSE_MATRIX) { 3554 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3555 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3556 local = &Mreuse; 3557 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3558 } else { 3559 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3560 Mreuse = *local; 3561 ierr = PetscFree(local);CHKERRQ(ierr); 3562 } 3563 3564 /* 3565 m - number of local rows 3566 n - number of columns (same on all processors) 3567 rstart - first row in new global matrix generated 3568 */ 3569 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3570 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3571 if (call == MAT_INITIAL_MATRIX) { 3572 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3573 ii = aij->i; 3574 jj = aij->j; 3575 3576 /* 3577 Determine the number of non-zeros in the diagonal and off-diagonal 3578 portions of the matrix in order to do correct preallocation 3579 */ 3580 3581 /* first get start and end of "diagonal" columns */ 3582 if (csize == PETSC_DECIDE) { 3583 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3584 if (mglobal == n) { /* square matrix */ 3585 nlocal = m; 3586 } else { 3587 nlocal = n/size + ((n % size) > rank); 3588 } 3589 } else { 3590 nlocal = csize; 3591 } 3592 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3593 rstart = rend - nlocal; 3594 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3595 3596 /* next, compute all the lengths */ 3597 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3598 olens = dlens + m; 3599 for (i=0; i<m; i++) { 3600 jend = ii[i+1] - ii[i]; 3601 olen = 0; 3602 dlen = 0; 3603 for (j=0; j<jend; j++) { 3604 if (*jj < rstart || *jj >= rend) olen++; 3605 else dlen++; 3606 jj++; 3607 } 3608 olens[i] = olen; 3609 dlens[i] = dlen; 3610 } 3611 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3612 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3613 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3614 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3615 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3616 ierr = PetscFree(dlens);CHKERRQ(ierr); 3617 } else { 3618 PetscInt ml,nl; 3619 3620 M = *newmat; 3621 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3622 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3623 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3624 /* 3625 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3626 rather than the slower MatSetValues(). 3627 */ 3628 M->was_assembled = PETSC_TRUE; 3629 M->assembled = PETSC_FALSE; 3630 } 3631 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3632 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3633 ii = aij->i; 3634 jj = aij->j; 3635 aa = aij->a; 3636 for (i=0; i<m; i++) { 3637 row = rstart + i; 3638 nz = ii[i+1] - ii[i]; 3639 cwork = jj; jj += nz; 3640 vwork = aa; aa += nz; 3641 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3642 } 3643 3644 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3645 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3646 *newmat = M; 3647 3648 /* save submatrix used in processor for next request */ 3649 if (call == MAT_INITIAL_MATRIX) { 3650 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3651 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3652 } 3653 3654 PetscFunctionReturn(0); 3655 } 3656 3657 EXTERN_C_BEGIN 3658 #undef __FUNCT__ 3659 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3660 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3661 { 3662 PetscInt m,cstart, cend,j,nnz,i,d; 3663 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3664 const PetscInt *JJ; 3665 PetscScalar *values; 3666 PetscErrorCode ierr; 3667 3668 PetscFunctionBegin; 3669 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3670 3671 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3672 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3673 m = B->rmap->n; 3674 cstart = B->cmap->rstart; 3675 cend = B->cmap->rend; 3676 rstart = B->rmap->rstart; 3677 3678 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3679 3680 #if defined(PETSC_USE_DEBUGGING) 3681 for (i=0; i<m; i++) { 3682 nnz = Ii[i+1]- Ii[i]; 3683 JJ = J + Ii[i]; 3684 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3685 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3686 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3687 } 3688 #endif 3689 3690 for (i=0; i<m; i++) { 3691 nnz = Ii[i+1]- Ii[i]; 3692 JJ = J + Ii[i]; 3693 nnz_max = PetscMax(nnz_max,nnz); 3694 d = 0; 3695 for (j=0; j<nnz; j++) { 3696 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3697 } 3698 d_nnz[i] = d; 3699 o_nnz[i] = nnz - d; 3700 } 3701 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3702 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3703 3704 if (v) values = (PetscScalar*)v; 3705 else { 3706 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3707 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3708 } 3709 3710 for (i=0; i<m; i++) { 3711 ii = i + rstart; 3712 nnz = Ii[i+1]- Ii[i]; 3713 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3714 } 3715 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3716 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3717 3718 if (!v) { 3719 ierr = PetscFree(values);CHKERRQ(ierr); 3720 } 3721 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3722 PetscFunctionReturn(0); 3723 } 3724 EXTERN_C_END 3725 3726 #undef __FUNCT__ 3727 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3728 /*@ 3729 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3730 (the default parallel PETSc format). 3731 3732 Collective on MPI_Comm 3733 3734 Input Parameters: 3735 + B - the matrix 3736 . i - the indices into j for the start of each local row (starts with zero) 3737 . j - the column indices for each local row (starts with zero) 3738 - v - optional values in the matrix 3739 3740 Level: developer 3741 3742 Notes: 3743 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3744 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3745 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3746 3747 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3748 3749 The format which is used for the sparse matrix input, is equivalent to a 3750 row-major ordering.. i.e for the following matrix, the input data expected is 3751 as shown: 3752 3753 1 0 0 3754 2 0 3 P0 3755 ------- 3756 4 5 6 P1 3757 3758 Process0 [P0]: rows_owned=[0,1] 3759 i = {0,1,3} [size = nrow+1 = 2+1] 3760 j = {0,0,2} [size = nz = 6] 3761 v = {1,2,3} [size = nz = 6] 3762 3763 Process1 [P1]: rows_owned=[2] 3764 i = {0,3} [size = nrow+1 = 1+1] 3765 j = {0,1,2} [size = nz = 6] 3766 v = {4,5,6} [size = nz = 6] 3767 3768 .keywords: matrix, aij, compressed row, sparse, parallel 3769 3770 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3771 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3772 @*/ 3773 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3774 { 3775 PetscErrorCode ierr; 3776 3777 PetscFunctionBegin; 3778 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3779 PetscFunctionReturn(0); 3780 } 3781 3782 #undef __FUNCT__ 3783 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3784 /*@C 3785 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3786 (the default parallel PETSc format). For good matrix assembly performance 3787 the user should preallocate the matrix storage by setting the parameters 3788 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3789 performance can be increased by more than a factor of 50. 3790 3791 Collective on MPI_Comm 3792 3793 Input Parameters: 3794 + A - the matrix 3795 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3796 (same value is used for all local rows) 3797 . d_nnz - array containing the number of nonzeros in the various rows of the 3798 DIAGONAL portion of the local submatrix (possibly different for each row) 3799 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3800 The size of this array is equal to the number of local rows, i.e 'm'. 3801 For matrices that will be factored, you must leave room for (and set) 3802 the diagonal entry even if it is zero. 3803 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3804 submatrix (same value is used for all local rows). 3805 - o_nnz - array containing the number of nonzeros in the various rows of the 3806 OFF-DIAGONAL portion of the local submatrix (possibly different for 3807 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3808 structure. The size of this array is equal to the number 3809 of local rows, i.e 'm'. 3810 3811 If the *_nnz parameter is given then the *_nz parameter is ignored 3812 3813 The AIJ format (also called the Yale sparse matrix format or 3814 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3815 storage. The stored row and column indices begin with zero. 3816 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3817 3818 The parallel matrix is partitioned such that the first m0 rows belong to 3819 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3820 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3821 3822 The DIAGONAL portion of the local submatrix of a processor can be defined 3823 as the submatrix which is obtained by extraction the part corresponding to 3824 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3825 first row that belongs to the processor, r2 is the last row belonging to 3826 the this processor, and c1-c2 is range of indices of the local part of a 3827 vector suitable for applying the matrix to. This is an mxn matrix. In the 3828 common case of a square matrix, the row and column ranges are the same and 3829 the DIAGONAL part is also square. The remaining portion of the local 3830 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3831 3832 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3833 3834 You can call MatGetInfo() to get information on how effective the preallocation was; 3835 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3836 You can also run with the option -info and look for messages with the string 3837 malloc in them to see if additional memory allocation was needed. 3838 3839 Example usage: 3840 3841 Consider the following 8x8 matrix with 34 non-zero values, that is 3842 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3843 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3844 as follows: 3845 3846 .vb 3847 1 2 0 | 0 3 0 | 0 4 3848 Proc0 0 5 6 | 7 0 0 | 8 0 3849 9 0 10 | 11 0 0 | 12 0 3850 ------------------------------------- 3851 13 0 14 | 15 16 17 | 0 0 3852 Proc1 0 18 0 | 19 20 21 | 0 0 3853 0 0 0 | 22 23 0 | 24 0 3854 ------------------------------------- 3855 Proc2 25 26 27 | 0 0 28 | 29 0 3856 30 0 0 | 31 32 33 | 0 34 3857 .ve 3858 3859 This can be represented as a collection of submatrices as: 3860 3861 .vb 3862 A B C 3863 D E F 3864 G H I 3865 .ve 3866 3867 Where the submatrices A,B,C are owned by proc0, D,E,F are 3868 owned by proc1, G,H,I are owned by proc2. 3869 3870 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3871 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3872 The 'M','N' parameters are 8,8, and have the same values on all procs. 3873 3874 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3875 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3876 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3877 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3878 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3879 matrix, ans [DF] as another SeqAIJ matrix. 3880 3881 When d_nz, o_nz parameters are specified, d_nz storage elements are 3882 allocated for every row of the local diagonal submatrix, and o_nz 3883 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3884 One way to choose d_nz and o_nz is to use the max nonzerors per local 3885 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3886 In this case, the values of d_nz,o_nz are: 3887 .vb 3888 proc0 : dnz = 2, o_nz = 2 3889 proc1 : dnz = 3, o_nz = 2 3890 proc2 : dnz = 1, o_nz = 4 3891 .ve 3892 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3893 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3894 for proc3. i.e we are using 12+15+10=37 storage locations to store 3895 34 values. 3896 3897 When d_nnz, o_nnz parameters are specified, the storage is specified 3898 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3899 In the above case the values for d_nnz,o_nnz are: 3900 .vb 3901 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3902 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3903 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3904 .ve 3905 Here the space allocated is sum of all the above values i.e 34, and 3906 hence pre-allocation is perfect. 3907 3908 Level: intermediate 3909 3910 .keywords: matrix, aij, compressed row, sparse, parallel 3911 3912 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3913 MPIAIJ, MatGetInfo() 3914 @*/ 3915 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3916 { 3917 PetscErrorCode ierr; 3918 3919 PetscFunctionBegin; 3920 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3921 PetscValidType(B,1); 3922 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3923 PetscFunctionReturn(0); 3924 } 3925 3926 #undef __FUNCT__ 3927 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3928 /*@ 3929 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3930 CSR format the local rows. 3931 3932 Collective on MPI_Comm 3933 3934 Input Parameters: 3935 + comm - MPI communicator 3936 . m - number of local rows (Cannot be PETSC_DECIDE) 3937 . n - This value should be the same as the local size used in creating the 3938 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3939 calculated if N is given) For square matrices n is almost always m. 3940 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3941 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3942 . i - row indices 3943 . j - column indices 3944 - a - matrix values 3945 3946 Output Parameter: 3947 . mat - the matrix 3948 3949 Level: intermediate 3950 3951 Notes: 3952 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3953 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3954 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3955 3956 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3957 3958 The format which is used for the sparse matrix input, is equivalent to a 3959 row-major ordering.. i.e for the following matrix, the input data expected is 3960 as shown: 3961 3962 1 0 0 3963 2 0 3 P0 3964 ------- 3965 4 5 6 P1 3966 3967 Process0 [P0]: rows_owned=[0,1] 3968 i = {0,1,3} [size = nrow+1 = 2+1] 3969 j = {0,0,2} [size = nz = 6] 3970 v = {1,2,3} [size = nz = 6] 3971 3972 Process1 [P1]: rows_owned=[2] 3973 i = {0,3} [size = nrow+1 = 1+1] 3974 j = {0,1,2} [size = nz = 6] 3975 v = {4,5,6} [size = nz = 6] 3976 3977 .keywords: matrix, aij, compressed row, sparse, parallel 3978 3979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3980 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3981 @*/ 3982 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3983 { 3984 PetscErrorCode ierr; 3985 3986 PetscFunctionBegin; 3987 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3988 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3989 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3990 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3991 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 3992 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3993 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3994 PetscFunctionReturn(0); 3995 } 3996 3997 #undef __FUNCT__ 3998 #define __FUNCT__ "MatCreateAIJ" 3999 /*@C 4000 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4001 (the default parallel PETSc format). For good matrix assembly performance 4002 the user should preallocate the matrix storage by setting the parameters 4003 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4004 performance can be increased by more than a factor of 50. 4005 4006 Collective on MPI_Comm 4007 4008 Input Parameters: 4009 + comm - MPI communicator 4010 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4011 This value should be the same as the local size used in creating the 4012 y vector for the matrix-vector product y = Ax. 4013 . n - This value should be the same as the local size used in creating the 4014 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4015 calculated if N is given) For square matrices n is almost always m. 4016 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4017 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4018 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4019 (same value is used for all local rows) 4020 . d_nnz - array containing the number of nonzeros in the various rows of the 4021 DIAGONAL portion of the local submatrix (possibly different for each row) 4022 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4023 The size of this array is equal to the number of local rows, i.e 'm'. 4024 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4025 submatrix (same value is used for all local rows). 4026 - o_nnz - array containing the number of nonzeros in the various rows of the 4027 OFF-DIAGONAL portion of the local submatrix (possibly different for 4028 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4029 structure. The size of this array is equal to the number 4030 of local rows, i.e 'm'. 4031 4032 Output Parameter: 4033 . A - the matrix 4034 4035 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4036 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4037 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4038 4039 Notes: 4040 If the *_nnz parameter is given then the *_nz parameter is ignored 4041 4042 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4043 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4044 storage requirements for this matrix. 4045 4046 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4047 processor than it must be used on all processors that share the object for 4048 that argument. 4049 4050 The user MUST specify either the local or global matrix dimensions 4051 (possibly both). 4052 4053 The parallel matrix is partitioned across processors such that the 4054 first m0 rows belong to process 0, the next m1 rows belong to 4055 process 1, the next m2 rows belong to process 2 etc.. where 4056 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4057 values corresponding to [m x N] submatrix. 4058 4059 The columns are logically partitioned with the n0 columns belonging 4060 to 0th partition, the next n1 columns belonging to the next 4061 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4062 4063 The DIAGONAL portion of the local submatrix on any given processor 4064 is the submatrix corresponding to the rows and columns m,n 4065 corresponding to the given processor. i.e diagonal matrix on 4066 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4067 etc. The remaining portion of the local submatrix [m x (N-n)] 4068 constitute the OFF-DIAGONAL portion. The example below better 4069 illustrates this concept. 4070 4071 For a square global matrix we define each processor's diagonal portion 4072 to be its local rows and the corresponding columns (a square submatrix); 4073 each processor's off-diagonal portion encompasses the remainder of the 4074 local matrix (a rectangular submatrix). 4075 4076 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4077 4078 When calling this routine with a single process communicator, a matrix of 4079 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4080 type of communicator, use the construction mechanism: 4081 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4082 4083 By default, this format uses inodes (identical nodes) when possible. 4084 We search for consecutive rows with the same nonzero structure, thereby 4085 reusing matrix information to achieve increased efficiency. 4086 4087 Options Database Keys: 4088 + -mat_no_inode - Do not use inodes 4089 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4090 - -mat_aij_oneindex - Internally use indexing starting at 1 4091 rather than 0. Note that when calling MatSetValues(), 4092 the user still MUST index entries starting at 0! 4093 4094 4095 Example usage: 4096 4097 Consider the following 8x8 matrix with 34 non-zero values, that is 4098 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4099 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4100 as follows: 4101 4102 .vb 4103 1 2 0 | 0 3 0 | 0 4 4104 Proc0 0 5 6 | 7 0 0 | 8 0 4105 9 0 10 | 11 0 0 | 12 0 4106 ------------------------------------- 4107 13 0 14 | 15 16 17 | 0 0 4108 Proc1 0 18 0 | 19 20 21 | 0 0 4109 0 0 0 | 22 23 0 | 24 0 4110 ------------------------------------- 4111 Proc2 25 26 27 | 0 0 28 | 29 0 4112 30 0 0 | 31 32 33 | 0 34 4113 .ve 4114 4115 This can be represented as a collection of submatrices as: 4116 4117 .vb 4118 A B C 4119 D E F 4120 G H I 4121 .ve 4122 4123 Where the submatrices A,B,C are owned by proc0, D,E,F are 4124 owned by proc1, G,H,I are owned by proc2. 4125 4126 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4127 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4128 The 'M','N' parameters are 8,8, and have the same values on all procs. 4129 4130 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4131 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4132 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4133 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4134 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4135 matrix, ans [DF] as another SeqAIJ matrix. 4136 4137 When d_nz, o_nz parameters are specified, d_nz storage elements are 4138 allocated for every row of the local diagonal submatrix, and o_nz 4139 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4140 One way to choose d_nz and o_nz is to use the max nonzerors per local 4141 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4142 In this case, the values of d_nz,o_nz are: 4143 .vb 4144 proc0 : dnz = 2, o_nz = 2 4145 proc1 : dnz = 3, o_nz = 2 4146 proc2 : dnz = 1, o_nz = 4 4147 .ve 4148 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4149 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4150 for proc3. i.e we are using 12+15+10=37 storage locations to store 4151 34 values. 4152 4153 When d_nnz, o_nnz parameters are specified, the storage is specified 4154 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4155 In the above case the values for d_nnz,o_nnz are: 4156 .vb 4157 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4158 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4159 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4160 .ve 4161 Here the space allocated is sum of all the above values i.e 34, and 4162 hence pre-allocation is perfect. 4163 4164 Level: intermediate 4165 4166 .keywords: matrix, aij, compressed row, sparse, parallel 4167 4168 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4169 MPIAIJ, MatCreateMPIAIJWithArrays() 4170 @*/ 4171 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4172 { 4173 PetscErrorCode ierr; 4174 PetscMPIInt size; 4175 4176 PetscFunctionBegin; 4177 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4178 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4180 if (size > 1) { 4181 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4182 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4183 } else { 4184 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4185 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4186 } 4187 PetscFunctionReturn(0); 4188 } 4189 4190 #undef __FUNCT__ 4191 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4192 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4193 { 4194 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4195 4196 PetscFunctionBegin; 4197 *Ad = a->A; 4198 *Ao = a->B; 4199 *colmap = a->garray; 4200 PetscFunctionReturn(0); 4201 } 4202 4203 #undef __FUNCT__ 4204 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4205 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4206 { 4207 PetscErrorCode ierr; 4208 PetscInt i; 4209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4210 4211 PetscFunctionBegin; 4212 if (coloring->ctype == IS_COLORING_GLOBAL) { 4213 ISColoringValue *allcolors,*colors; 4214 ISColoring ocoloring; 4215 4216 /* set coloring for diagonal portion */ 4217 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4218 4219 /* set coloring for off-diagonal portion */ 4220 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4221 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4222 for (i=0; i<a->B->cmap->n; i++) { 4223 colors[i] = allcolors[a->garray[i]]; 4224 } 4225 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4226 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4227 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4228 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4229 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4230 ISColoringValue *colors; 4231 PetscInt *larray; 4232 ISColoring ocoloring; 4233 4234 /* set coloring for diagonal portion */ 4235 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4236 for (i=0; i<a->A->cmap->n; i++) { 4237 larray[i] = i + A->cmap->rstart; 4238 } 4239 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4240 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4241 for (i=0; i<a->A->cmap->n; i++) { 4242 colors[i] = coloring->colors[larray[i]]; 4243 } 4244 ierr = PetscFree(larray);CHKERRQ(ierr); 4245 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4246 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4247 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4248 4249 /* set coloring for off-diagonal portion */ 4250 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4251 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4252 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4253 for (i=0; i<a->B->cmap->n; i++) { 4254 colors[i] = coloring->colors[larray[i]]; 4255 } 4256 ierr = PetscFree(larray);CHKERRQ(ierr); 4257 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4258 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4259 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4260 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4261 4262 PetscFunctionReturn(0); 4263 } 4264 4265 #if defined(PETSC_HAVE_ADIC) 4266 #undef __FUNCT__ 4267 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4268 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4269 { 4270 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4271 PetscErrorCode ierr; 4272 4273 PetscFunctionBegin; 4274 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4275 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4276 PetscFunctionReturn(0); 4277 } 4278 #endif 4279 4280 #undef __FUNCT__ 4281 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4282 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4283 { 4284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4285 PetscErrorCode ierr; 4286 4287 PetscFunctionBegin; 4288 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4289 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4290 PetscFunctionReturn(0); 4291 } 4292 4293 #undef __FUNCT__ 4294 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4295 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4296 { 4297 PetscErrorCode ierr; 4298 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4299 PetscInt *indx; 4300 4301 PetscFunctionBegin; 4302 /* This routine will ONLY return MPIAIJ type matrix */ 4303 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4304 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4305 if (n == PETSC_DECIDE){ 4306 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4307 } 4308 /* Check sum(n) = N */ 4309 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4310 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4311 4312 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4313 rstart -= m; 4314 4315 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4316 for (i=0;i<m;i++) { 4317 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4318 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4319 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4320 } 4321 4322 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4323 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4324 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4325 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4326 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4327 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4333 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4334 { 4335 PetscErrorCode ierr; 4336 PetscInt m,N,i,rstart,nnz,Ii; 4337 PetscInt *indx; 4338 PetscScalar *values; 4339 4340 PetscFunctionBegin; 4341 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4342 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4343 for (i=0;i<m;i++) { 4344 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4345 Ii = i + rstart; 4346 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4347 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4348 } 4349 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4350 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4351 PetscFunctionReturn(0); 4352 } 4353 4354 #undef __FUNCT__ 4355 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4356 /*@ 4357 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4358 matrices from each processor 4359 4360 Collective on MPI_Comm 4361 4362 Input Parameters: 4363 + comm - the communicators the parallel matrix will live on 4364 . inmat - the input sequential matrices 4365 . n - number of local columns (or PETSC_DECIDE) 4366 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4367 4368 Output Parameter: 4369 . outmat - the parallel matrix generated 4370 4371 Level: advanced 4372 4373 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4374 4375 @*/ 4376 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4377 { 4378 PetscErrorCode ierr; 4379 4380 PetscFunctionBegin; 4381 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4382 if (scall == MAT_INITIAL_MATRIX){ 4383 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4384 } 4385 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4386 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4387 PetscFunctionReturn(0); 4388 } 4389 4390 #undef __FUNCT__ 4391 #define __FUNCT__ "MatFileSplit" 4392 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4393 { 4394 PetscErrorCode ierr; 4395 PetscMPIInt rank; 4396 PetscInt m,N,i,rstart,nnz; 4397 size_t len; 4398 const PetscInt *indx; 4399 PetscViewer out; 4400 char *name; 4401 Mat B; 4402 const PetscScalar *values; 4403 4404 PetscFunctionBegin; 4405 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4406 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4407 /* Should this be the type of the diagonal block of A? */ 4408 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4409 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4410 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4411 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4412 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4413 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4414 for (i=0;i<m;i++) { 4415 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4416 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4417 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4418 } 4419 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4420 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4421 4422 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4423 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4424 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4425 sprintf(name,"%s.%d",outfile,rank); 4426 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4427 ierr = PetscFree(name); 4428 ierr = MatView(B,out);CHKERRQ(ierr); 4429 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4430 ierr = MatDestroy(&B);CHKERRQ(ierr); 4431 PetscFunctionReturn(0); 4432 } 4433 4434 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4435 #undef __FUNCT__ 4436 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4437 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4438 { 4439 PetscErrorCode ierr; 4440 Mat_Merge_SeqsToMPI *merge; 4441 PetscContainer container; 4442 4443 PetscFunctionBegin; 4444 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4445 if (container) { 4446 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4447 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4448 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4449 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4450 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4451 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4452 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4453 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4454 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4455 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4456 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4457 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4458 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4459 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4460 ierr = PetscFree(merge);CHKERRQ(ierr); 4461 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4462 } 4463 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4464 PetscFunctionReturn(0); 4465 } 4466 4467 #include <../src/mat/utils/freespace.h> 4468 #include <petscbt.h> 4469 4470 #undef __FUNCT__ 4471 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4472 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4473 { 4474 PetscErrorCode ierr; 4475 MPI_Comm comm=((PetscObject)mpimat)->comm; 4476 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4477 PetscMPIInt size,rank,taga,*len_s; 4478 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4479 PetscInt proc,m; 4480 PetscInt **buf_ri,**buf_rj; 4481 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4482 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4483 MPI_Request *s_waits,*r_waits; 4484 MPI_Status *status; 4485 MatScalar *aa=a->a; 4486 MatScalar **abuf_r,*ba_i; 4487 Mat_Merge_SeqsToMPI *merge; 4488 PetscContainer container; 4489 4490 PetscFunctionBegin; 4491 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4492 4493 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4494 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4495 4496 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4497 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4498 4499 bi = merge->bi; 4500 bj = merge->bj; 4501 buf_ri = merge->buf_ri; 4502 buf_rj = merge->buf_rj; 4503 4504 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4505 owners = merge->rowmap->range; 4506 len_s = merge->len_s; 4507 4508 /* send and recv matrix values */ 4509 /*-----------------------------*/ 4510 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4511 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4512 4513 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4514 for (proc=0,k=0; proc<size; proc++){ 4515 if (!len_s[proc]) continue; 4516 i = owners[proc]; 4517 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4518 k++; 4519 } 4520 4521 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4522 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4523 ierr = PetscFree(status);CHKERRQ(ierr); 4524 4525 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4526 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4527 4528 /* insert mat values of mpimat */ 4529 /*----------------------------*/ 4530 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4531 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4532 4533 for (k=0; k<merge->nrecv; k++){ 4534 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4535 nrows = *(buf_ri_k[k]); 4536 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4537 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4538 } 4539 4540 /* set values of ba */ 4541 m = merge->rowmap->n; 4542 for (i=0; i<m; i++) { 4543 arow = owners[rank] + i; 4544 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4545 bnzi = bi[i+1] - bi[i]; 4546 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4547 4548 /* add local non-zero vals of this proc's seqmat into ba */ 4549 anzi = ai[arow+1] - ai[arow]; 4550 aj = a->j + ai[arow]; 4551 aa = a->a + ai[arow]; 4552 nextaj = 0; 4553 for (j=0; nextaj<anzi; j++){ 4554 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4555 ba_i[j] += aa[nextaj++]; 4556 } 4557 } 4558 4559 /* add received vals into ba */ 4560 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4561 /* i-th row */ 4562 if (i == *nextrow[k]) { 4563 anzi = *(nextai[k]+1) - *nextai[k]; 4564 aj = buf_rj[k] + *(nextai[k]); 4565 aa = abuf_r[k] + *(nextai[k]); 4566 nextaj = 0; 4567 for (j=0; nextaj<anzi; j++){ 4568 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4569 ba_i[j] += aa[nextaj++]; 4570 } 4571 } 4572 nextrow[k]++; nextai[k]++; 4573 } 4574 } 4575 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4576 } 4577 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4578 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4579 4580 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4581 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4582 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4583 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4584 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4589 4590 #undef __FUNCT__ 4591 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4592 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4593 { 4594 PetscErrorCode ierr; 4595 Mat B_mpi; 4596 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4597 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4598 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4599 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4600 PetscInt len,proc,*dnz,*onz,bs,cbs; 4601 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4602 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4603 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4604 MPI_Status *status; 4605 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4606 PetscBT lnkbt; 4607 Mat_Merge_SeqsToMPI *merge; 4608 PetscContainer container; 4609 4610 PetscFunctionBegin; 4611 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4612 4613 /* make sure it is a PETSc comm */ 4614 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4615 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4616 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4617 4618 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4619 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4620 4621 /* determine row ownership */ 4622 /*---------------------------------------------------------*/ 4623 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4624 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4625 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4626 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4627 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4628 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4629 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4630 4631 m = merge->rowmap->n; 4632 M = merge->rowmap->N; 4633 owners = merge->rowmap->range; 4634 4635 /* determine the number of messages to send, their lengths */ 4636 /*---------------------------------------------------------*/ 4637 len_s = merge->len_s; 4638 4639 len = 0; /* length of buf_si[] */ 4640 merge->nsend = 0; 4641 for (proc=0; proc<size; proc++){ 4642 len_si[proc] = 0; 4643 if (proc == rank){ 4644 len_s[proc] = 0; 4645 } else { 4646 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4647 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4648 } 4649 if (len_s[proc]) { 4650 merge->nsend++; 4651 nrows = 0; 4652 for (i=owners[proc]; i<owners[proc+1]; i++){ 4653 if (ai[i+1] > ai[i]) nrows++; 4654 } 4655 len_si[proc] = 2*(nrows+1); 4656 len += len_si[proc]; 4657 } 4658 } 4659 4660 /* determine the number and length of messages to receive for ij-structure */ 4661 /*-------------------------------------------------------------------------*/ 4662 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4663 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4664 4665 /* post the Irecv of j-structure */ 4666 /*-------------------------------*/ 4667 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4668 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4669 4670 /* post the Isend of j-structure */ 4671 /*--------------------------------*/ 4672 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4673 4674 for (proc=0, k=0; proc<size; proc++){ 4675 if (!len_s[proc]) continue; 4676 i = owners[proc]; 4677 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4678 k++; 4679 } 4680 4681 /* receives and sends of j-structure are complete */ 4682 /*------------------------------------------------*/ 4683 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4684 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4685 4686 /* send and recv i-structure */ 4687 /*---------------------------*/ 4688 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4689 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4690 4691 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4692 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4693 for (proc=0,k=0; proc<size; proc++){ 4694 if (!len_s[proc]) continue; 4695 /* form outgoing message for i-structure: 4696 buf_si[0]: nrows to be sent 4697 [1:nrows]: row index (global) 4698 [nrows+1:2*nrows+1]: i-structure index 4699 */ 4700 /*-------------------------------------------*/ 4701 nrows = len_si[proc]/2 - 1; 4702 buf_si_i = buf_si + nrows+1; 4703 buf_si[0] = nrows; 4704 buf_si_i[0] = 0; 4705 nrows = 0; 4706 for (i=owners[proc]; i<owners[proc+1]; i++){ 4707 anzi = ai[i+1] - ai[i]; 4708 if (anzi) { 4709 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4710 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4711 nrows++; 4712 } 4713 } 4714 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4715 k++; 4716 buf_si += len_si[proc]; 4717 } 4718 4719 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4720 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4721 4722 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4723 for (i=0; i<merge->nrecv; i++){ 4724 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4725 } 4726 4727 ierr = PetscFree(len_si);CHKERRQ(ierr); 4728 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4729 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4730 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4731 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4732 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4733 ierr = PetscFree(status);CHKERRQ(ierr); 4734 4735 /* compute a local seq matrix in each processor */ 4736 /*----------------------------------------------*/ 4737 /* allocate bi array and free space for accumulating nonzero column info */ 4738 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4739 bi[0] = 0; 4740 4741 /* create and initialize a linked list */ 4742 nlnk = N+1; 4743 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4744 4745 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4746 len = 0; 4747 len = ai[owners[rank+1]] - ai[owners[rank]]; 4748 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4749 current_space = free_space; 4750 4751 /* determine symbolic info for each local row */ 4752 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4753 4754 for (k=0; k<merge->nrecv; k++){ 4755 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4756 nrows = *buf_ri_k[k]; 4757 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4758 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4759 } 4760 4761 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4762 len = 0; 4763 for (i=0;i<m;i++) { 4764 bnzi = 0; 4765 /* add local non-zero cols of this proc's seqmat into lnk */ 4766 arow = owners[rank] + i; 4767 anzi = ai[arow+1] - ai[arow]; 4768 aj = a->j + ai[arow]; 4769 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4770 bnzi += nlnk; 4771 /* add received col data into lnk */ 4772 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4773 if (i == *nextrow[k]) { /* i-th row */ 4774 anzi = *(nextai[k]+1) - *nextai[k]; 4775 aj = buf_rj[k] + *nextai[k]; 4776 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4777 bnzi += nlnk; 4778 nextrow[k]++; nextai[k]++; 4779 } 4780 } 4781 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4782 4783 /* if free space is not available, make more free space */ 4784 if (current_space->local_remaining<bnzi) { 4785 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4786 nspacedouble++; 4787 } 4788 /* copy data into free space, then initialize lnk */ 4789 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4790 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4791 4792 current_space->array += bnzi; 4793 current_space->local_used += bnzi; 4794 current_space->local_remaining -= bnzi; 4795 4796 bi[i+1] = bi[i] + bnzi; 4797 } 4798 4799 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4800 4801 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4802 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4803 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4804 4805 /* create symbolic parallel matrix B_mpi */ 4806 /*---------------------------------------*/ 4807 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4808 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4809 if (n==PETSC_DECIDE) { 4810 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4811 } else { 4812 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4813 } 4814 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4815 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4816 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4817 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4818 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4819 4820 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4821 B_mpi->assembled = PETSC_FALSE; 4822 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4823 merge->bi = bi; 4824 merge->bj = bj; 4825 merge->buf_ri = buf_ri; 4826 merge->buf_rj = buf_rj; 4827 merge->coi = PETSC_NULL; 4828 merge->coj = PETSC_NULL; 4829 merge->owners_co = PETSC_NULL; 4830 4831 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4832 4833 /* attach the supporting struct to B_mpi for reuse */ 4834 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4835 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4836 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4837 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4838 *mpimat = B_mpi; 4839 4840 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4841 PetscFunctionReturn(0); 4842 } 4843 4844 #undef __FUNCT__ 4845 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4846 /*@C 4847 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4848 matrices from each processor 4849 4850 Collective on MPI_Comm 4851 4852 Input Parameters: 4853 + comm - the communicators the parallel matrix will live on 4854 . seqmat - the input sequential matrices 4855 . m - number of local rows (or PETSC_DECIDE) 4856 . n - number of local columns (or PETSC_DECIDE) 4857 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4858 4859 Output Parameter: 4860 . mpimat - the parallel matrix generated 4861 4862 Level: advanced 4863 4864 Notes: 4865 The dimensions of the sequential matrix in each processor MUST be the same. 4866 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4867 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4868 @*/ 4869 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4870 { 4871 PetscErrorCode ierr; 4872 PetscMPIInt size; 4873 4874 PetscFunctionBegin; 4875 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4876 if (size == 1){ 4877 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4878 if (scall == MAT_INITIAL_MATRIX){ 4879 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4880 } else { 4881 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4882 } 4883 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4884 PetscFunctionReturn(0); 4885 } 4886 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4887 if (scall == MAT_INITIAL_MATRIX){ 4888 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4889 } 4890 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4891 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4892 PetscFunctionReturn(0); 4893 } 4894 4895 #undef __FUNCT__ 4896 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4897 /*@ 4898 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4899 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4900 with MatGetSize() 4901 4902 Not Collective 4903 4904 Input Parameters: 4905 + A - the matrix 4906 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4907 4908 Output Parameter: 4909 . A_loc - the local sequential matrix generated 4910 4911 Level: developer 4912 4913 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4914 4915 @*/ 4916 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4917 { 4918 PetscErrorCode ierr; 4919 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4920 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4921 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4922 MatScalar *aa=a->a,*ba=b->a,*cam; 4923 PetscScalar *ca; 4924 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4925 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4926 PetscBool match; 4927 4928 PetscFunctionBegin; 4929 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4930 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4931 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4932 if (scall == MAT_INITIAL_MATRIX){ 4933 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4934 ci[0] = 0; 4935 for (i=0; i<am; i++){ 4936 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4937 } 4938 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4939 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4940 k = 0; 4941 for (i=0; i<am; i++) { 4942 ncols_o = bi[i+1] - bi[i]; 4943 ncols_d = ai[i+1] - ai[i]; 4944 /* off-diagonal portion of A */ 4945 for (jo=0; jo<ncols_o; jo++) { 4946 col = cmap[*bj]; 4947 if (col >= cstart) break; 4948 cj[k] = col; bj++; 4949 ca[k++] = *ba++; 4950 } 4951 /* diagonal portion of A */ 4952 for (j=0; j<ncols_d; j++) { 4953 cj[k] = cstart + *aj++; 4954 ca[k++] = *aa++; 4955 } 4956 /* off-diagonal portion of A */ 4957 for (j=jo; j<ncols_o; j++) { 4958 cj[k] = cmap[*bj++]; 4959 ca[k++] = *ba++; 4960 } 4961 } 4962 /* put together the new matrix */ 4963 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4964 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4965 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4966 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4967 mat->free_a = PETSC_TRUE; 4968 mat->free_ij = PETSC_TRUE; 4969 mat->nonew = 0; 4970 } else if (scall == MAT_REUSE_MATRIX){ 4971 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4972 ci = mat->i; cj = mat->j; cam = mat->a; 4973 for (i=0; i<am; i++) { 4974 /* off-diagonal portion of A */ 4975 ncols_o = bi[i+1] - bi[i]; 4976 for (jo=0; jo<ncols_o; jo++) { 4977 col = cmap[*bj]; 4978 if (col >= cstart) break; 4979 *cam++ = *ba++; bj++; 4980 } 4981 /* diagonal portion of A */ 4982 ncols_d = ai[i+1] - ai[i]; 4983 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4984 /* off-diagonal portion of A */ 4985 for (j=jo; j<ncols_o; j++) { 4986 *cam++ = *ba++; bj++; 4987 } 4988 } 4989 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4990 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4991 PetscFunctionReturn(0); 4992 } 4993 4994 #undef __FUNCT__ 4995 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4996 /*@C 4997 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4998 4999 Not Collective 5000 5001 Input Parameters: 5002 + A - the matrix 5003 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5004 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5005 5006 Output Parameter: 5007 . A_loc - the local sequential matrix generated 5008 5009 Level: developer 5010 5011 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5012 5013 @*/ 5014 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5015 { 5016 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5017 PetscErrorCode ierr; 5018 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5019 IS isrowa,iscola; 5020 Mat *aloc; 5021 PetscBool match; 5022 5023 PetscFunctionBegin; 5024 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5025 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5026 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5027 if (!row){ 5028 start = A->rmap->rstart; end = A->rmap->rend; 5029 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5030 } else { 5031 isrowa = *row; 5032 } 5033 if (!col){ 5034 start = A->cmap->rstart; 5035 cmap = a->garray; 5036 nzA = a->A->cmap->n; 5037 nzB = a->B->cmap->n; 5038 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5039 ncols = 0; 5040 for (i=0; i<nzB; i++) { 5041 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5042 else break; 5043 } 5044 imark = i; 5045 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5046 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5047 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5048 } else { 5049 iscola = *col; 5050 } 5051 if (scall != MAT_INITIAL_MATRIX){ 5052 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5053 aloc[0] = *A_loc; 5054 } 5055 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5056 *A_loc = aloc[0]; 5057 ierr = PetscFree(aloc);CHKERRQ(ierr); 5058 if (!row){ 5059 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5060 } 5061 if (!col){ 5062 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5063 } 5064 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5065 PetscFunctionReturn(0); 5066 } 5067 5068 #undef __FUNCT__ 5069 #define __FUNCT__ "MatGetBrowsOfAcols" 5070 /*@C 5071 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5072 5073 Collective on Mat 5074 5075 Input Parameters: 5076 + A,B - the matrices in mpiaij format 5077 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5078 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5079 5080 Output Parameter: 5081 + rowb, colb - index sets of rows and columns of B to extract 5082 - B_seq - the sequential matrix generated 5083 5084 Level: developer 5085 5086 @*/ 5087 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5088 { 5089 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5090 PetscErrorCode ierr; 5091 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5092 IS isrowb,iscolb; 5093 Mat *bseq=PETSC_NULL; 5094 5095 PetscFunctionBegin; 5096 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5097 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5098 } 5099 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5100 5101 if (scall == MAT_INITIAL_MATRIX){ 5102 start = A->cmap->rstart; 5103 cmap = a->garray; 5104 nzA = a->A->cmap->n; 5105 nzB = a->B->cmap->n; 5106 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5107 ncols = 0; 5108 for (i=0; i<nzB; i++) { /* row < local row index */ 5109 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5110 else break; 5111 } 5112 imark = i; 5113 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5114 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5115 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5116 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5117 } else { 5118 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5119 isrowb = *rowb; iscolb = *colb; 5120 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5121 bseq[0] = *B_seq; 5122 } 5123 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5124 *B_seq = bseq[0]; 5125 ierr = PetscFree(bseq);CHKERRQ(ierr); 5126 if (!rowb){ 5127 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5128 } else { 5129 *rowb = isrowb; 5130 } 5131 if (!colb){ 5132 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5133 } else { 5134 *colb = iscolb; 5135 } 5136 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5137 PetscFunctionReturn(0); 5138 } 5139 5140 #undef __FUNCT__ 5141 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5142 /* 5143 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5144 of the OFF-DIAGONAL portion of local A 5145 5146 Collective on Mat 5147 5148 Input Parameters: 5149 + A,B - the matrices in mpiaij format 5150 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5151 5152 Output Parameter: 5153 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5154 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5155 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5156 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5157 5158 Level: developer 5159 5160 */ 5161 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5162 { 5163 VecScatter_MPI_General *gen_to,*gen_from; 5164 PetscErrorCode ierr; 5165 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5166 Mat_SeqAIJ *b_oth; 5167 VecScatter ctx=a->Mvctx; 5168 MPI_Comm comm=((PetscObject)ctx)->comm; 5169 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5170 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5171 PetscScalar *rvalues,*svalues; 5172 MatScalar *b_otha,*bufa,*bufA; 5173 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5174 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5175 MPI_Status *sstatus,rstatus; 5176 PetscMPIInt jj; 5177 PetscInt *cols,sbs,rbs; 5178 PetscScalar *vals; 5179 5180 PetscFunctionBegin; 5181 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5182 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5183 } 5184 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5185 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5186 5187 gen_to = (VecScatter_MPI_General*)ctx->todata; 5188 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5189 rvalues = gen_from->values; /* holds the length of receiving row */ 5190 svalues = gen_to->values; /* holds the length of sending row */ 5191 nrecvs = gen_from->n; 5192 nsends = gen_to->n; 5193 5194 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5195 srow = gen_to->indices; /* local row index to be sent */ 5196 sstarts = gen_to->starts; 5197 sprocs = gen_to->procs; 5198 sstatus = gen_to->sstatus; 5199 sbs = gen_to->bs; 5200 rstarts = gen_from->starts; 5201 rprocs = gen_from->procs; 5202 rbs = gen_from->bs; 5203 5204 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5205 if (scall == MAT_INITIAL_MATRIX){ 5206 /* i-array */ 5207 /*---------*/ 5208 /* post receives */ 5209 for (i=0; i<nrecvs; i++){ 5210 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5211 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5212 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5213 } 5214 5215 /* pack the outgoing message */ 5216 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5217 sstartsj[0] = 0; rstartsj[0] = 0; 5218 len = 0; /* total length of j or a array to be sent */ 5219 k = 0; 5220 for (i=0; i<nsends; i++){ 5221 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5222 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5223 for (j=0; j<nrows; j++) { 5224 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5225 for (l=0; l<sbs; l++){ 5226 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5227 rowlen[j*sbs+l] = ncols; 5228 len += ncols; 5229 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5230 } 5231 k++; 5232 } 5233 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5234 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5235 } 5236 /* recvs and sends of i-array are completed */ 5237 i = nrecvs; 5238 while (i--) { 5239 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5240 } 5241 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5242 5243 /* allocate buffers for sending j and a arrays */ 5244 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5245 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5246 5247 /* create i-array of B_oth */ 5248 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5249 b_othi[0] = 0; 5250 len = 0; /* total length of j or a array to be received */ 5251 k = 0; 5252 for (i=0; i<nrecvs; i++){ 5253 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5254 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5255 for (j=0; j<nrows; j++) { 5256 b_othi[k+1] = b_othi[k] + rowlen[j]; 5257 len += rowlen[j]; k++; 5258 } 5259 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5260 } 5261 5262 /* allocate space for j and a arrrays of B_oth */ 5263 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5264 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5265 5266 /* j-array */ 5267 /*---------*/ 5268 /* post receives of j-array */ 5269 for (i=0; i<nrecvs; i++){ 5270 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5271 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5272 } 5273 5274 /* pack the outgoing message j-array */ 5275 k = 0; 5276 for (i=0; i<nsends; i++){ 5277 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5278 bufJ = bufj+sstartsj[i]; 5279 for (j=0; j<nrows; j++) { 5280 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5281 for (ll=0; ll<sbs; ll++){ 5282 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5283 for (l=0; l<ncols; l++){ 5284 *bufJ++ = cols[l]; 5285 } 5286 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5287 } 5288 } 5289 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5290 } 5291 5292 /* recvs and sends of j-array are completed */ 5293 i = nrecvs; 5294 while (i--) { 5295 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5296 } 5297 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5298 } else if (scall == MAT_REUSE_MATRIX){ 5299 sstartsj = *startsj_s; 5300 rstartsj = *startsj_r; 5301 bufa = *bufa_ptr; 5302 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5303 b_otha = b_oth->a; 5304 } else { 5305 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5306 } 5307 5308 /* a-array */ 5309 /*---------*/ 5310 /* post receives of a-array */ 5311 for (i=0; i<nrecvs; i++){ 5312 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5313 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5314 } 5315 5316 /* pack the outgoing message a-array */ 5317 k = 0; 5318 for (i=0; i<nsends; i++){ 5319 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5320 bufA = bufa+sstartsj[i]; 5321 for (j=0; j<nrows; j++) { 5322 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5323 for (ll=0; ll<sbs; ll++){ 5324 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5325 for (l=0; l<ncols; l++){ 5326 *bufA++ = vals[l]; 5327 } 5328 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5329 } 5330 } 5331 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5332 } 5333 /* recvs and sends of a-array are completed */ 5334 i = nrecvs; 5335 while (i--) { 5336 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5337 } 5338 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5339 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5340 5341 if (scall == MAT_INITIAL_MATRIX){ 5342 /* put together the new matrix */ 5343 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5344 5345 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5346 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5347 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5348 b_oth->free_a = PETSC_TRUE; 5349 b_oth->free_ij = PETSC_TRUE; 5350 b_oth->nonew = 0; 5351 5352 ierr = PetscFree(bufj);CHKERRQ(ierr); 5353 if (!startsj_s || !bufa_ptr){ 5354 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5355 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5356 } else { 5357 *startsj_s = sstartsj; 5358 *startsj_r = rstartsj; 5359 *bufa_ptr = bufa; 5360 } 5361 } 5362 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5363 PetscFunctionReturn(0); 5364 } 5365 5366 #undef __FUNCT__ 5367 #define __FUNCT__ "MatGetCommunicationStructs" 5368 /*@C 5369 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5370 5371 Not Collective 5372 5373 Input Parameters: 5374 . A - The matrix in mpiaij format 5375 5376 Output Parameter: 5377 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5378 . colmap - A map from global column index to local index into lvec 5379 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5380 5381 Level: developer 5382 5383 @*/ 5384 #if defined (PETSC_USE_CTABLE) 5385 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5386 #else 5387 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5388 #endif 5389 { 5390 Mat_MPIAIJ *a; 5391 5392 PetscFunctionBegin; 5393 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5394 PetscValidPointer(lvec, 2); 5395 PetscValidPointer(colmap, 3); 5396 PetscValidPointer(multScatter, 4); 5397 a = (Mat_MPIAIJ *) A->data; 5398 if (lvec) *lvec = a->lvec; 5399 if (colmap) *colmap = a->colmap; 5400 if (multScatter) *multScatter = a->Mvctx; 5401 PetscFunctionReturn(0); 5402 } 5403 5404 EXTERN_C_BEGIN 5405 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5406 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5407 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5408 EXTERN_C_END 5409 5410 #undef __FUNCT__ 5411 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5412 /* 5413 Computes (B'*A')' since computing B*A directly is untenable 5414 5415 n p p 5416 ( ) ( ) ( ) 5417 m ( A ) * n ( B ) = m ( C ) 5418 ( ) ( ) ( ) 5419 5420 */ 5421 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5422 { 5423 PetscErrorCode ierr; 5424 Mat At,Bt,Ct; 5425 5426 PetscFunctionBegin; 5427 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5428 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5429 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5430 ierr = MatDestroy(&At);CHKERRQ(ierr); 5431 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5432 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5433 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5434 PetscFunctionReturn(0); 5435 } 5436 5437 #undef __FUNCT__ 5438 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5439 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5440 { 5441 PetscErrorCode ierr; 5442 PetscInt m=A->rmap->n,n=B->cmap->n; 5443 Mat Cmat; 5444 5445 PetscFunctionBegin; 5446 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5447 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5448 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5449 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5450 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5451 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5452 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5453 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5454 *C = Cmat; 5455 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5456 PetscFunctionReturn(0); 5457 } 5458 5459 /* ----------------------------------------------------------------*/ 5460 #undef __FUNCT__ 5461 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5462 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5463 { 5464 PetscErrorCode ierr; 5465 5466 PetscFunctionBegin; 5467 if (scall == MAT_INITIAL_MATRIX){ 5468 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5469 } 5470 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5471 PetscFunctionReturn(0); 5472 } 5473 5474 EXTERN_C_BEGIN 5475 #if defined(PETSC_HAVE_MUMPS) 5476 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5477 #endif 5478 #if defined(PETSC_HAVE_PASTIX) 5479 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5480 #endif 5481 #if defined(PETSC_HAVE_SUPERLU_DIST) 5482 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5483 #endif 5484 #if defined(PETSC_HAVE_SPOOLES) 5485 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5486 #endif 5487 EXTERN_C_END 5488 5489 /*MC 5490 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5491 5492 Options Database Keys: 5493 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5494 5495 Level: beginner 5496 5497 .seealso: MatCreateAIJ() 5498 M*/ 5499 5500 EXTERN_C_BEGIN 5501 #undef __FUNCT__ 5502 #define __FUNCT__ "MatCreate_MPIAIJ" 5503 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5504 { 5505 Mat_MPIAIJ *b; 5506 PetscErrorCode ierr; 5507 PetscMPIInt size; 5508 5509 PetscFunctionBegin; 5510 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5511 5512 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5513 B->data = (void*)b; 5514 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5515 B->assembled = PETSC_FALSE; 5516 5517 B->insertmode = NOT_SET_VALUES; 5518 b->size = size; 5519 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5520 5521 /* build cache for off array entries formed */ 5522 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5523 b->donotstash = PETSC_FALSE; 5524 b->colmap = 0; 5525 b->garray = 0; 5526 b->roworiented = PETSC_TRUE; 5527 5528 /* stuff used for matrix vector multiply */ 5529 b->lvec = PETSC_NULL; 5530 b->Mvctx = PETSC_NULL; 5531 5532 /* stuff for MatGetRow() */ 5533 b->rowindices = 0; 5534 b->rowvalues = 0; 5535 b->getrowactive = PETSC_FALSE; 5536 5537 #if defined(PETSC_HAVE_SPOOLES) 5538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5539 "MatGetFactor_mpiaij_spooles", 5540 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5541 #endif 5542 #if defined(PETSC_HAVE_MUMPS) 5543 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5544 "MatGetFactor_aij_mumps", 5545 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5546 #endif 5547 #if defined(PETSC_HAVE_PASTIX) 5548 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5549 "MatGetFactor_mpiaij_pastix", 5550 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5551 #endif 5552 #if defined(PETSC_HAVE_SUPERLU_DIST) 5553 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5554 "MatGetFactor_mpiaij_superlu_dist", 5555 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5556 #endif 5557 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5558 "MatStoreValues_MPIAIJ", 5559 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5560 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5561 "MatRetrieveValues_MPIAIJ", 5562 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5563 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5564 "MatGetDiagonalBlock_MPIAIJ", 5565 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5567 "MatIsTranspose_MPIAIJ", 5568 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5569 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5570 "MatMPIAIJSetPreallocation_MPIAIJ", 5571 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5573 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5574 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5576 "MatDiagonalScaleLocal_MPIAIJ", 5577 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5578 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5579 "MatConvert_MPIAIJ_MPIAIJPERM", 5580 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5582 "MatConvert_MPIAIJ_MPIAIJCRL", 5583 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5585 "MatConvert_MPIAIJ_MPISBAIJ", 5586 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5588 "MatMatMult_MPIDense_MPIAIJ", 5589 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5591 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5592 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5594 "MatMatMultNumeric_MPIDense_MPIAIJ", 5595 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5597 PetscFunctionReturn(0); 5598 } 5599 EXTERN_C_END 5600 5601 #undef __FUNCT__ 5602 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5603 /*@ 5604 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5605 and "off-diagonal" part of the matrix in CSR format. 5606 5607 Collective on MPI_Comm 5608 5609 Input Parameters: 5610 + comm - MPI communicator 5611 . m - number of local rows (Cannot be PETSC_DECIDE) 5612 . n - This value should be the same as the local size used in creating the 5613 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5614 calculated if N is given) For square matrices n is almost always m. 5615 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5616 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5617 . i - row indices for "diagonal" portion of matrix 5618 . j - column indices 5619 . a - matrix values 5620 . oi - row indices for "off-diagonal" portion of matrix 5621 . oj - column indices 5622 - oa - matrix values 5623 5624 Output Parameter: 5625 . mat - the matrix 5626 5627 Level: advanced 5628 5629 Notes: 5630 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5631 must free the arrays once the matrix has been destroyed and not before. 5632 5633 The i and j indices are 0 based 5634 5635 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5636 5637 This sets local rows and cannot be used to set off-processor values. 5638 5639 You cannot later use MatSetValues() to change values in this matrix. 5640 5641 .keywords: matrix, aij, compressed row, sparse, parallel 5642 5643 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5644 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5645 @*/ 5646 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5647 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5648 { 5649 PetscErrorCode ierr; 5650 Mat_MPIAIJ *maij; 5651 5652 PetscFunctionBegin; 5653 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5654 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5655 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5656 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5657 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5658 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5659 maij = (Mat_MPIAIJ*) (*mat)->data; 5660 maij->donotstash = PETSC_TRUE; 5661 (*mat)->preallocated = PETSC_TRUE; 5662 5663 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5664 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5665 5666 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5667 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5668 5669 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5670 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5671 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5672 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5673 5674 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5675 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5676 PetscFunctionReturn(0); 5677 } 5678 5679 /* 5680 Special version for direct calls from Fortran 5681 */ 5682 #include <petsc-private/fortranimpl.h> 5683 5684 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5685 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5686 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5687 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5688 #endif 5689 5690 /* Change these macros so can be used in void function */ 5691 #undef CHKERRQ 5692 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5693 #undef SETERRQ2 5694 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5695 #undef SETERRQ3 5696 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5697 #undef SETERRQ 5698 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5699 5700 EXTERN_C_BEGIN 5701 #undef __FUNCT__ 5702 #define __FUNCT__ "matsetvaluesmpiaij_" 5703 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5704 { 5705 Mat mat = *mmat; 5706 PetscInt m = *mm, n = *mn; 5707 InsertMode addv = *maddv; 5708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5709 PetscScalar value; 5710 PetscErrorCode ierr; 5711 5712 MatCheckPreallocated(mat,1); 5713 if (mat->insertmode == NOT_SET_VALUES) { 5714 mat->insertmode = addv; 5715 } 5716 #if defined(PETSC_USE_DEBUG) 5717 else if (mat->insertmode != addv) { 5718 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5719 } 5720 #endif 5721 { 5722 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5723 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5724 PetscBool roworiented = aij->roworiented; 5725 5726 /* Some Variables required in the macro */ 5727 Mat A = aij->A; 5728 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5729 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5730 MatScalar *aa = a->a; 5731 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5732 Mat B = aij->B; 5733 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5734 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5735 MatScalar *ba = b->a; 5736 5737 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5738 PetscInt nonew = a->nonew; 5739 MatScalar *ap1,*ap2; 5740 5741 PetscFunctionBegin; 5742 for (i=0; i<m; i++) { 5743 if (im[i] < 0) continue; 5744 #if defined(PETSC_USE_DEBUG) 5745 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5746 #endif 5747 if (im[i] >= rstart && im[i] < rend) { 5748 row = im[i] - rstart; 5749 lastcol1 = -1; 5750 rp1 = aj + ai[row]; 5751 ap1 = aa + ai[row]; 5752 rmax1 = aimax[row]; 5753 nrow1 = ailen[row]; 5754 low1 = 0; 5755 high1 = nrow1; 5756 lastcol2 = -1; 5757 rp2 = bj + bi[row]; 5758 ap2 = ba + bi[row]; 5759 rmax2 = bimax[row]; 5760 nrow2 = bilen[row]; 5761 low2 = 0; 5762 high2 = nrow2; 5763 5764 for (j=0; j<n; j++) { 5765 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5766 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5767 if (in[j] >= cstart && in[j] < cend){ 5768 col = in[j] - cstart; 5769 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5770 } else if (in[j] < 0) continue; 5771 #if defined(PETSC_USE_DEBUG) 5772 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5773 #endif 5774 else { 5775 if (mat->was_assembled) { 5776 if (!aij->colmap) { 5777 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5778 } 5779 #if defined (PETSC_USE_CTABLE) 5780 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5781 col--; 5782 #else 5783 col = aij->colmap[in[j]] - 1; 5784 #endif 5785 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5786 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5787 col = in[j]; 5788 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5789 B = aij->B; 5790 b = (Mat_SeqAIJ*)B->data; 5791 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5792 rp2 = bj + bi[row]; 5793 ap2 = ba + bi[row]; 5794 rmax2 = bimax[row]; 5795 nrow2 = bilen[row]; 5796 low2 = 0; 5797 high2 = nrow2; 5798 bm = aij->B->rmap->n; 5799 ba = b->a; 5800 } 5801 } else col = in[j]; 5802 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5803 } 5804 } 5805 } else { 5806 if (!aij->donotstash) { 5807 if (roworiented) { 5808 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5809 } else { 5810 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5811 } 5812 } 5813 } 5814 }} 5815 PetscFunctionReturnVoid(); 5816 } 5817 EXTERN_C_END 5818 5819