1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.42 1996/04/30 19:19:34 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = 0; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 45 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 46 PetscFree(ia); PetscFree(ja); 47 } else { 48 if (ishift == oshift) { 49 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 50 } 51 else if (ishift == -1) { 52 /* temporarily subtract 1 from i and j indices */ 53 int nz = a->i[n] - 1; 54 for ( i=0; i<nz; i++ ) a->j[i]--; 55 for ( i=0; i<n+1; i++ ) a->i[i]--; 56 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 57 for ( i=0; i<nz; i++ ) a->j[i]++; 58 for ( i=0; i<n+1; i++ ) a->i[i]++; 59 } else { 60 /* temporarily add 1 to i and j indices */ 61 int nz = a->i[n] - 1; 62 for ( i=0; i<nz; i++ ) a->j[i]++; 63 for ( i=0; i<n+1; i++ ) a->i[i]++; 64 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 65 for ( i=0; i<nz; i++ ) a->j[i]--; 66 for ( i=0; i<n+1; i++ ) a->i[i]--; 67 } 68 } 69 return 0; 70 } 71 72 /* 73 Adds diagonal pointers to sparse matrix structure. 74 */ 75 76 int MatMarkDiag_SeqBAIJ(Mat A) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 79 int i,j, *diag, m = a->mbs; 80 81 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 82 PLogObjectMemory(A,(m+1)*sizeof(int)); 83 for ( i=0; i<m; i++ ) { 84 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 85 if (a->j[j] == i) { 86 diag[i] = j; 87 break; 88 } 89 } 90 } 91 a->diag = diag; 92 return 0; 93 } 94 95 #include "draw.h" 96 #include "pinclude/pviewer.h" 97 #include "sys.h" 98 99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 100 { 101 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 102 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 103 Scalar *aa; 104 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 col_lens[1] = a->m; 109 col_lens[2] = a->n; 110 col_lens[3] = a->nz*bs2; 111 112 /* store lengths of each row and write (including header) to file */ 113 count = 0; 114 for ( i=0; i<a->mbs; i++ ) { 115 for ( j=0; j<bs; j++ ) { 116 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 117 } 118 } 119 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 120 PetscFree(col_lens); 121 122 /* store column indices (zero start index) */ 123 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 124 count = 0; 125 for ( i=0; i<a->mbs; i++ ) { 126 for ( j=0; j<bs; j++ ) { 127 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 128 for ( l=0; l<bs; l++ ) { 129 jj[count++] = bs*a->j[k] + l; 130 } 131 } 132 } 133 } 134 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 135 PetscFree(jj); 136 137 /* store nonzero values */ 138 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 139 count = 0; 140 for ( i=0; i<a->mbs; i++ ) { 141 for ( j=0; j<bs; j++ ) { 142 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 143 for ( l=0; l<bs; l++ ) { 144 aa[count++] = a->a[bs2*k + l*bs + j]; 145 } 146 } 147 } 148 } 149 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 150 PetscFree(aa); 151 return 0; 152 } 153 154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 155 { 156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 157 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 158 FILE *fd; 159 char *outputname; 160 161 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 162 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 163 ierr = ViewerGetFormat(viewer,&format); 164 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 165 fprintf(fd," block size is %d\n",bs); 166 } 167 else if (format == ASCII_FORMAT_MATLAB) { 168 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 169 } 170 else if (format == ASCII_FORMAT_COMMON) { 171 for ( i=0; i<a->mbs; i++ ) { 172 for ( j=0; j<bs; j++ ) { 173 fprintf(fd,"row %d:",i*bs+j); 174 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 175 for ( l=0; l<bs; l++ ) { 176 #if defined(PETSC_COMPLEX) 177 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 178 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 179 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 180 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 181 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 182 #else 183 if (a->a[bs2*k + l*bs + j] != 0.0) 184 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 185 #endif 186 } 187 } 188 fprintf(fd,"\n"); 189 } 190 } 191 } 192 else { 193 for ( i=0; i<a->mbs; i++ ) { 194 for ( j=0; j<bs; j++ ) { 195 fprintf(fd,"row %d:",i*bs+j); 196 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 197 for ( l=0; l<bs; l++ ) { 198 #if defined(PETSC_COMPLEX) 199 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 200 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 201 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 202 } 203 else { 204 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 205 } 206 #else 207 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 208 #endif 209 } 210 } 211 fprintf(fd,"\n"); 212 } 213 } 214 } 215 fflush(fd); 216 return 0; 217 } 218 219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 220 { 221 Mat A = (Mat) obj; 222 ViewerType vtype; 223 int ierr; 224 225 if (!viewer) { 226 viewer = STDOUT_VIEWER_SELF; 227 } 228 229 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 230 if (vtype == MATLAB_VIEWER) { 231 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 232 } 233 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 234 return MatView_SeqBAIJ_ASCII(A,viewer); 235 } 236 else if (vtype == BINARY_FILE_VIEWER) { 237 return MatView_SeqBAIJ_Binary(A,viewer); 238 } 239 else if (vtype == DRAW_VIEWER) { 240 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 241 } 242 return 0; 243 } 244 245 #define CHUNKSIZE 10 246 247 /* This version has row oriented v */ 248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 251 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 252 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 253 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 254 int ridx,cidx,bs2=a->bs2; 255 Scalar *ap,value,*aa=a->a,*bap; 256 257 for ( k=0; k<m; k++ ) { /* loop over added rows */ 258 row = im[k]; brow = row/bs; 259 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 260 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 261 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 262 rmax = imax[brow]; nrow = ailen[brow]; 263 low = 0; 264 for ( l=0; l<n; l++ ) { /* loop over added columns */ 265 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 266 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 267 col = in[l]; bcol = col/bs; 268 ridx = row % bs; cidx = col % bs; 269 if (roworiented) { 270 value = *v++; 271 } 272 else { 273 value = v[k + l*m]; 274 } 275 if (!sorted) low = 0; high = nrow; 276 while (high-low > 5) { 277 t = (low+high)/2; 278 if (rp[t] > bcol) high = t; 279 else low = t; 280 } 281 for ( i=low; i<high; i++ ) { 282 if (rp[i] > bcol) break; 283 if (rp[i] == bcol) { 284 bap = ap + bs2*i + bs*cidx + ridx; 285 if (is == ADD_VALUES) *bap += value; 286 else *bap = value; 287 goto noinsert; 288 } 289 } 290 if (nonew) goto noinsert; 291 if (nrow >= rmax) { 292 /* there is no extra room in row, therefore enlarge */ 293 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 294 Scalar *new_a; 295 296 /* malloc new storage space */ 297 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 298 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 299 new_j = (int *) (new_a + bs2*new_nz); 300 new_i = new_j + new_nz; 301 302 /* copy over old data into new slots */ 303 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 304 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 305 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 306 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 307 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 308 len*sizeof(int)); 309 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 310 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 311 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 312 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 313 /* free up old matrix storage */ 314 PetscFree(a->a); 315 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 316 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 317 a->singlemalloc = 1; 318 319 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 320 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 321 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 322 a->maxnz += CHUNKSIZE; 323 a->reallocs++; 324 a->nz++; 325 } 326 N = nrow++ - 1; 327 /* shift up all the later entries in this row */ 328 for ( ii=N; ii>=i; ii-- ) { 329 rp[ii+1] = rp[ii]; 330 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 331 } 332 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 333 rp[i] = bcol; 334 ap[bs2*i + bs*cidx + ridx] = value; 335 noinsert:; 336 low = i; 337 } 338 ailen[brow] = nrow; 339 } 340 return 0; 341 } 342 343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 344 { 345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 346 *m = a->m; *n = a->n; 347 return 0; 348 } 349 350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 351 { 352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 353 *m = 0; *n = a->m; 354 return 0; 355 } 356 357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 358 { 359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 360 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 361 Scalar *aa,*v_i,*aa_i; 362 363 bs = a->bs; 364 ai = a->i; 365 aj = a->j; 366 aa = a->a; 367 bs2 = a->bs2; 368 369 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 370 371 bn = row/bs; /* Block number */ 372 bp = row % bs; /* Block Position */ 373 M = ai[bn+1] - ai[bn]; 374 *nz = bs*M; 375 376 if (v) { 377 *v = 0; 378 if (*nz) { 379 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 380 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 381 v_i = *v + i*bs; 382 aa_i = aa + bs2*(ai[bn] + i); 383 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 384 } 385 } 386 } 387 388 if (idx) { 389 *idx = 0; 390 if (*nz) { 391 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 392 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 393 idx_i = *idx + i*bs; 394 itmp = bs*aj[ai[bn] + i]; 395 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 396 } 397 } 398 } 399 return 0; 400 } 401 402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 403 { 404 if (idx) {if (*idx) PetscFree(*idx);} 405 if (v) {if (*v) PetscFree(*v);} 406 return 0; 407 } 408 409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 410 { 411 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 412 Mat C; 413 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 414 int *rows,*cols,bs2=a->bs2; 415 Scalar *array=a->a; 416 417 if (B==PETSC_NULL && mbs!=nbs) 418 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 419 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 420 PetscMemzero(col,(1+nbs)*sizeof(int)); 421 422 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 423 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 424 PetscFree(col); 425 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 426 cols = rows + bs; 427 for ( i=0; i<mbs; i++ ) { 428 cols[0] = i*bs; 429 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 430 len = ai[i+1] - ai[i]; 431 for ( j=0; j<len; j++ ) { 432 rows[0] = (*aj++)*bs; 433 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 434 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 435 array += bs2; 436 } 437 } 438 PetscFree(rows); 439 440 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 441 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 442 443 if (B != PETSC_NULL) { 444 *B = C; 445 } else { 446 /* This isn't really an in-place transpose */ 447 PetscFree(a->a); 448 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 449 if (a->diag) PetscFree(a->diag); 450 if (a->ilen) PetscFree(a->ilen); 451 if (a->imax) PetscFree(a->imax); 452 if (a->solve_work) PetscFree(a->solve_work); 453 PetscFree(a); 454 PetscMemcpy(A,C,sizeof(struct _Mat)); 455 PetscHeaderDestroy(C); 456 } 457 return 0; 458 } 459 460 461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 462 { 463 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 464 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 465 int m = a->m,*ip, N, *ailen = a->ilen; 466 int mbs = a->mbs, bs2 = a->bs2; 467 Scalar *aa = a->a, *ap; 468 469 if (mode == FLUSH_ASSEMBLY) return 0; 470 471 for ( i=1; i<mbs; i++ ) { 472 /* move each row back by the amount of empty slots (fshift) before it*/ 473 fshift += imax[i-1] - ailen[i-1]; 474 if (fshift) { 475 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 476 N = ailen[i]; 477 for ( j=0; j<N; j++ ) { 478 ip[j-fshift] = ip[j]; 479 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 480 } 481 } 482 ai[i] = ai[i-1] + ailen[i-1]; 483 } 484 if (mbs) { 485 fshift += imax[mbs-1] - ailen[mbs-1]; 486 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 487 } 488 /* reset ilen and imax for each row */ 489 for ( i=0; i<mbs; i++ ) { 490 ailen[i] = imax[i] = ai[i+1] - ai[i]; 491 } 492 a->nz = ai[mbs]; 493 494 /* diagonals may have moved, so kill the diagonal pointers */ 495 if (fshift && a->diag) { 496 PetscFree(a->diag); 497 PLogObjectMemory(A,-(m+1)*sizeof(int)); 498 a->diag = 0; 499 } 500 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space(blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 501 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 502 a->reallocs); 503 return 0; 504 } 505 506 static int MatZeroEntries_SeqBAIJ(Mat A) 507 { 508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 509 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 510 return 0; 511 } 512 513 int MatDestroy_SeqBAIJ(PetscObject obj) 514 { 515 Mat A = (Mat) obj; 516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 517 518 #if defined(PETSC_LOG) 519 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 520 #endif 521 PetscFree(a->a); 522 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 523 if (a->diag) PetscFree(a->diag); 524 if (a->ilen) PetscFree(a->ilen); 525 if (a->imax) PetscFree(a->imax); 526 if (a->solve_work) PetscFree(a->solve_work); 527 if (a->mult_work) PetscFree(a->mult_work); 528 PetscFree(a); 529 PLogObjectDestroy(A); 530 PetscHeaderDestroy(A); 531 return 0; 532 } 533 534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 535 { 536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 537 if (op == ROW_ORIENTED) a->roworiented = 1; 538 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 539 else if (op == COLUMNS_SORTED) a->sorted = 1; 540 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 541 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 542 else if (op == ROWS_SORTED || 543 op == SYMMETRIC_MATRIX || 544 op == STRUCTURALLY_SYMMETRIC_MATRIX || 545 op == YES_NEW_DIAGONALS) 546 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 547 else if (op == NO_NEW_DIAGONALS) 548 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 549 else 550 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 551 return 0; 552 } 553 554 555 /* -------------------------------------------------------*/ 556 /* Should check that shapes of vectors and matrices match */ 557 /* -------------------------------------------------------*/ 558 #include "pinclude/plapack.h" 559 560 static int MatMult_SeqBAIJ(Mat A,Vec xx,Vec zz) 561 { 562 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 563 Scalar *xg,*zg; 564 register Scalar *x,*z,*v,sum,*xb,sum1,sum2,sum3,sum4,sum5; 565 register Scalar x1,x2,x3,x4,x5; 566 int mbs=a->mbs,i,*idx,*ii; 567 int bs=a->bs,j,n,bs2=a->bs2,ierr; 568 569 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 570 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 571 572 idx = a->j; 573 v = a->a; 574 ii = a->i; 575 576 switch (bs) { 577 case 1: 578 for ( i=0; i<mbs; i++ ) { 579 n = ii[1] - ii[0]; ii++; 580 sum = 0.0; 581 while (n--) sum += *v++ * x[*idx++]; 582 z[i] = sum; 583 } 584 break; 585 case 2: 586 for ( i=0; i<mbs; i++ ) { 587 n = ii[1] - ii[0]; ii++; 588 sum1 = 0.0; sum2 = 0.0; 589 for ( j=0; j<n; j++ ) { 590 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 591 sum1 += v[0]*x1 + v[2]*x2; 592 sum2 += v[1]*x1 + v[3]*x2; 593 v += 4; 594 } 595 z[0] = sum1; z[1] = sum2; 596 z += 2; 597 } 598 break; 599 case 3: 600 for ( i=0; i<mbs; i++ ) { 601 n = ii[1] - ii[0]; ii++; 602 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 603 for ( j=0; j<n; j++ ) { 604 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 605 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 606 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 607 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 608 v += 9; 609 } 610 z[0] = sum1; z[1] = sum2; z[2] = sum3; 611 z += 3; 612 } 613 break; 614 case 4: 615 for ( i=0; i<mbs; i++ ) { 616 n = ii[1] - ii[0]; ii++; 617 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 618 for ( j=0; j<n; j++ ) { 619 xb = x + 4*(*idx++); 620 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 621 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 622 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 623 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 624 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 625 v += 16; 626 } 627 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 628 z += 4; 629 } 630 break; 631 case 5: 632 for ( i=0; i<mbs; i++ ) { 633 n = ii[1] - ii[0]; ii++; 634 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 635 for ( j=0; j<n; j++ ) { 636 xb = x + 5*(*idx++); 637 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 638 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 639 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 640 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 641 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 642 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 643 v += 25; 644 } 645 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 646 z += 5; 647 } 648 break; 649 /* block sizes larger then 5 by 5 are handled by BLAS */ 650 default: { 651 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0; 652 if (!a->mult_work) { 653 k = PetscMax(a->m,a->n); 654 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 655 CHKPTRQ(a->mult_work); 656 } 657 work = a->mult_work; 658 for ( i=0; i<mbs; i++ ) { 659 n = ii[1] - ii[0]; ii++; 660 ncols = n*bs; 661 workt = work; 662 for ( j=0; j<n; j++ ) { 663 xb = x + bs*(*idx++); 664 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 665 workt += bs; 666 } 667 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 668 v += n*bs2; 669 z += bs; 670 } 671 } 672 } 673 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 674 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 675 PLogFlops(2*a->nz*bs2 - a->m); 676 return 0; 677 } 678 679 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 680 { 681 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 682 Scalar *xg,*zg,*zb; 683 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 684 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval; 685 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 686 687 688 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 689 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 690 691 idx = a->j; 692 v = a->a; 693 ii = a->i; 694 695 switch (bs) { 696 case 1: 697 for ( i=0; i<mbs; i++ ) { 698 n = ii[1] - ii[0]; ii++; 699 xb = x + i; x1 = xb[0]; 700 ib = idx + ai[i]; 701 for ( j=0; j<n; j++ ) { 702 z[ib[j]] = *v++ * x1; 703 } 704 } 705 break; 706 case 2: 707 for ( i=0; i<mbs; i++ ) { 708 n = ii[1] - ii[0]; ii++; 709 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 710 ib = idx + ai[i]; 711 for ( j=0; j<n; j++ ) { 712 rval = ib[j]*2; 713 z[rval++] = v[0]*x1 + v[1]*x2; 714 z[rval++] = v[2]*x1 + v[3]*x2; 715 v += 4; 716 } 717 } 718 break; 719 case 3: 720 for ( i=0; i<mbs; i++ ) { 721 n = ii[1] - ii[0]; ii++; 722 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 723 ib = idx + ai[i]; 724 for ( j=0; j<n; j++ ) { 725 rval = ib[j]*3; 726 z[rval++] = v[0]*x1 + v[1]*x2 + v[2]*x3; 727 z[rval++] = v[3]*x1 + v[4]*x2 + v[5]*x3; 728 z[rval++] = v[6]*x1 + v[7]*x2 + v[8]*x3; 729 v += 9; 730 } 731 } 732 break; 733 case 4: 734 for ( i=0; i<mbs; i++ ) { 735 n = ii[1] - ii[0]; ii++; 736 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 737 ib = idx + ai[i]; 738 for ( j=0; j<n; j++ ) { 739 rval = ib[j]*4; 740 z[rval++] = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 741 z[rval++] = v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 742 z[rval++] = v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 743 z[rval++] = v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 744 v += 16; 745 } 746 } 747 break; 748 case 5: 749 for ( i=0; i<mbs; i++ ) { 750 n = ii[1] - ii[0]; ii++; 751 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 752 x4 = xb[3]; x5 = xb[4]; 753 ib = idx + ai[i]; 754 for ( j=0; j<n; j++ ) { 755 rval = ib[j]*5; 756 z[rval++] = v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 757 z[rval++] = v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 758 z[rval++] = v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 759 z[rval++] = v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 760 z[rval++] = v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 761 v += 25; 762 } 763 } 764 break; 765 /* block sizes larger then 5 by 5 are handled by BLAS */ 766 default: { 767 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 768 if (!a->mult_work) { 769 k = PetscMax(a->m,a->n); 770 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 771 CHKPTRQ(a->mult_work); 772 } 773 work = a->mult_work; 774 for ( i=0; i<mbs; i++ ) { 775 n = ii[1] - ii[0]; ii++; 776 ncols = n*bs; 777 PetscMemzero(work,ncols*sizeof(Scalar)); 778 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 779 v += n*bs2; 780 x += bs; 781 workt = work; 782 for ( j=0; j<n; j++ ) { 783 zb = z + bs*(*idx++); 784 for ( k=0; k<bs; k++ ) zb[k] = workt[k] ; 785 workt += bs; 786 } 787 } 788 } 789 } 790 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 791 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 792 return 0; 793 } 794 795 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 796 { 797 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 798 if (nz) *nz = a->bs2*a->nz; 799 if (nza) *nza = a->maxnz; 800 if (mem) *mem = (int)A->mem; 801 return 0; 802 } 803 804 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 805 { 806 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 807 808 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 809 810 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 811 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 812 (a->nz != b->nz)) { 813 *flg = PETSC_FALSE; return 0; 814 } 815 816 /* if the a->i are the same */ 817 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 818 *flg = PETSC_FALSE; return 0; 819 } 820 821 /* if a->j are the same */ 822 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 823 *flg = PETSC_FALSE; return 0; 824 } 825 826 /* if a->a are the same */ 827 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 828 *flg = PETSC_FALSE; return 0; 829 } 830 *flg = PETSC_TRUE; 831 return 0; 832 833 } 834 835 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 836 { 837 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 838 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 839 Scalar *x, zero = 0.0,*aa,*aa_j; 840 841 bs = a->bs; 842 aa = a->a; 843 ai = a->i; 844 aj = a->j; 845 ambs = a->mbs; 846 bs2 = a->bs2; 847 848 VecSet(&zero,v); 849 VecGetArray(v,&x); VecGetLocalSize(v,&n); 850 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 851 for ( i=0; i<ambs; i++ ) { 852 for ( j=ai[i]; j<ai[i+1]; j++ ) { 853 if (aj[j] == i) { 854 row = i*bs; 855 aa_j = aa+j*bs2; 856 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 857 break; 858 } 859 } 860 } 861 return 0; 862 } 863 864 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 865 { 866 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 867 Scalar *l,*r,x,*v,*aa,*li,*ri; 868 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 869 870 ai = a->i; 871 aj = a->j; 872 aa = a->a; 873 m = a->m; 874 n = a->n; 875 bs = a->bs; 876 mbs = a->mbs; 877 bs2 = a->bs2; 878 if (ll) { 879 VecGetArray(ll,&l); VecGetSize(ll,&lm); 880 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 881 for ( i=0; i<mbs; i++ ) { /* for each block row */ 882 M = ai[i+1] - ai[i]; 883 li = l + i*bs; 884 v = aa + bs2*ai[i]; 885 for ( j=0; j<M; j++ ) { /* for each block */ 886 for ( k=0; k<bs2; k++ ) { 887 (*v++) *= li[k%bs]; 888 } 889 } 890 } 891 } 892 893 if (rr) { 894 VecGetArray(rr,&r); VecGetSize(rr,&rn); 895 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 896 for ( i=0; i<mbs; i++ ) { /* for each block row */ 897 M = ai[i+1] - ai[i]; 898 v = aa + bs2*ai[i]; 899 for ( j=0; j<M; j++ ) { /* for each block */ 900 ri = r + bs*aj[ai[i]+j]; 901 for ( k=0; k<bs; k++ ) { 902 x = ri[k]; 903 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 904 } 905 } 906 } 907 } 908 return 0; 909 } 910 911 912 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 913 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 914 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 915 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 916 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 917 918 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 919 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 920 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 921 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 922 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 923 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 924 925 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 926 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 927 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 928 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 929 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 930 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 931 932 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 933 { 934 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 935 Scalar *v = a->a; 936 double sum = 0.0; 937 int i,nz=a->nz,bs2=a->bs2; 938 939 if (type == NORM_FROBENIUS) { 940 for (i=0; i< bs2*nz; i++ ) { 941 #if defined(PETSC_COMPLEX) 942 sum += real(conj(*v)*(*v)); v++; 943 #else 944 sum += (*v)*(*v); v++; 945 #endif 946 } 947 *norm = sqrt(sum); 948 } 949 else { 950 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 951 } 952 return 0; 953 } 954 955 /* 956 note: This can only work for identity for row and col. It would 957 be good to check this and otherwise generate an error. 958 */ 959 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 960 { 961 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 962 Mat outA; 963 int ierr; 964 965 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 966 967 outA = inA; 968 inA->factor = FACTOR_LU; 969 a->row = row; 970 a->col = col; 971 972 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 973 974 if (!a->diag) { 975 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 976 } 977 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 978 return 0; 979 } 980 981 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 982 { 983 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 984 int one = 1, totalnz = a->bs2*a->nz; 985 BLscal_( &totalnz, alpha, a->a, &one ); 986 PLogFlops(totalnz); 987 return 0; 988 } 989 990 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 991 { 992 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 993 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 994 int *ai = a->i, *ailen = a->ilen; 995 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 996 Scalar *ap, *aa = a->a, zero = 0.0; 997 998 for ( k=0; k<m; k++ ) { /* loop over rows */ 999 row = im[k]; brow = row/bs; 1000 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1001 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1002 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1003 nrow = ailen[brow]; 1004 for ( l=0; l<n; l++ ) { /* loop over columns */ 1005 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1006 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1007 col = in[l] ; 1008 bcol = col/bs; 1009 cidx = col%bs; 1010 ridx = row%bs; 1011 high = nrow; 1012 low = 0; /* assume unsorted */ 1013 while (high-low > 5) { 1014 t = (low+high)/2; 1015 if (rp[t] > bcol) high = t; 1016 else low = t; 1017 } 1018 for ( i=low; i<high; i++ ) { 1019 if (rp[i] > bcol) break; 1020 if (rp[i] == bcol) { 1021 *v++ = ap[bs2*i+bs*cidx+ridx]; 1022 goto finished; 1023 } 1024 } 1025 *v++ = zero; 1026 finished:; 1027 } 1028 } 1029 return 0; 1030 } 1031 1032 /* -------------------------------------------------------------------*/ 1033 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1034 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1035 MatMult_SeqBAIJ,0, 1036 MatMultTrans_SeqBAIJ,0, 1037 MatSolve_SeqBAIJ_N,0, 1038 0,0, 1039 MatLUFactor_SeqBAIJ,0, 1040 0, 1041 MatTranspose_SeqBAIJ, 1042 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1043 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1044 0,MatAssemblyEnd_SeqBAIJ, 1045 0, 1046 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1047 MatGetReordering_SeqBAIJ, 1048 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1049 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1050 MatILUFactorSymbolic_SeqBAIJ,0, 1051 0,0,/* MatConvert_SeqBAIJ */ 0, 1052 MatGetSubMatrix_SeqBAIJ,0, 1053 MatConvertSameType_SeqBAIJ,0,0, 1054 MatILUFactor_SeqBAIJ,0,0, 1055 0,MatIncreaseOverlap_SeqBAIJ, 1056 MatGetValues_SeqBAIJ,0, 1057 0,MatScale_SeqBAIJ, 1058 0}; 1059 1060 /*@C 1061 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1062 compressed row) format. For good matrix assembly performance the 1063 user should preallocate the matrix storage by setting the parameter nz 1064 (or nzz). By setting these parameters accurately, performance can be 1065 increased by more than a factor of 50. 1066 1067 Input Parameters: 1068 . comm - MPI communicator, set to MPI_COMM_SELF 1069 . bs - size of block 1070 . m - number of rows 1071 . n - number of columns 1072 . nz - number of block nonzeros per block row (same for all rows) 1073 . nzz - number of block nonzeros per block row or PETSC_NULL 1074 (possibly different for each row) 1075 1076 Output Parameter: 1077 . A - the matrix 1078 1079 Notes: 1080 The block AIJ format is fully compatible with standard Fortran 77 1081 storage. That is, the stored row and column indices can begin at 1082 either one (as in Fortran) or zero. See the users' manual for details. 1083 1084 Specify the preallocated storage with either nz or nnz (not both). 1085 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1086 allocation. For additional details, see the users manual chapter on 1087 matrices and the file $(PETSC_DIR)/Performance. 1088 1089 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1090 @*/ 1091 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1092 { 1093 Mat B; 1094 Mat_SeqBAIJ *b; 1095 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1096 1097 if (mbs*bs!=m || nbs*bs!=n) 1098 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1099 1100 *A = 0; 1101 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1102 PLogObjectCreate(B); 1103 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1104 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1105 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1106 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1107 if (!flg) { 1108 switch (bs) { 1109 case 1: 1110 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1111 B->ops.solve = MatSolve_SeqBAIJ_1; 1112 break; 1113 case 2: 1114 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1115 B->ops.solve = MatSolve_SeqBAIJ_2; 1116 break; 1117 case 3: 1118 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1119 B->ops.solve = MatSolve_SeqBAIJ_3; 1120 break; 1121 case 4: 1122 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1123 B->ops.solve = MatSolve_SeqBAIJ_4; 1124 break; 1125 case 5: 1126 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1127 B->ops.solve = MatSolve_SeqBAIJ_5; 1128 break; 1129 } 1130 } 1131 B->destroy = MatDestroy_SeqBAIJ; 1132 B->view = MatView_SeqBAIJ; 1133 B->factor = 0; 1134 B->lupivotthreshold = 1.0; 1135 b->row = 0; 1136 b->col = 0; 1137 b->reallocs = 0; 1138 1139 b->m = m; B->m = m; B->M = m; 1140 b->n = n; B->n = n; B->N = n; 1141 b->mbs = mbs; 1142 b->nbs = nbs; 1143 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1144 if (nnz == PETSC_NULL) { 1145 if (nz == PETSC_DEFAULT) nz = 5; 1146 else if (nz <= 0) nz = 1; 1147 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1148 nz = nz*mbs; 1149 } 1150 else { 1151 nz = 0; 1152 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1153 } 1154 1155 /* allocate the matrix space */ 1156 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1157 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1158 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1159 b->j = (int *) (b->a + nz*bs2); 1160 PetscMemzero(b->j,nz*sizeof(int)); 1161 b->i = b->j + nz; 1162 b->singlemalloc = 1; 1163 1164 b->i[0] = 0; 1165 for (i=1; i<mbs+1; i++) { 1166 b->i[i] = b->i[i-1] + b->imax[i-1]; 1167 } 1168 1169 /* b->ilen will count nonzeros in each block row so far. */ 1170 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1171 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1172 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1173 1174 b->bs = bs; 1175 b->bs2 = bs2; 1176 b->mbs = mbs; 1177 b->nz = 0; 1178 b->maxnz = nz; 1179 b->sorted = 0; 1180 b->roworiented = 1; 1181 b->nonew = 0; 1182 b->diag = 0; 1183 b->solve_work = 0; 1184 b->mult_work = 0; 1185 b->spptr = 0; 1186 *A = B; 1187 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1188 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1189 return 0; 1190 } 1191 1192 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1193 { 1194 Mat C; 1195 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1196 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1197 1198 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1199 1200 *B = 0; 1201 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1202 PLogObjectCreate(C); 1203 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1204 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1205 C->destroy = MatDestroy_SeqBAIJ; 1206 C->view = MatView_SeqBAIJ; 1207 C->factor = A->factor; 1208 c->row = 0; 1209 c->col = 0; 1210 C->assembled = PETSC_TRUE; 1211 1212 c->m = C->m = a->m; 1213 c->n = C->n = a->n; 1214 C->M = a->m; 1215 C->N = a->n; 1216 1217 c->bs = a->bs; 1218 c->bs2 = a->bs2; 1219 c->mbs = a->mbs; 1220 c->nbs = a->nbs; 1221 1222 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1223 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1224 for ( i=0; i<mbs; i++ ) { 1225 c->imax[i] = a->imax[i]; 1226 c->ilen[i] = a->ilen[i]; 1227 } 1228 1229 /* allocate the matrix space */ 1230 c->singlemalloc = 1; 1231 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1232 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1233 c->j = (int *) (c->a + nz*bs2); 1234 c->i = c->j + nz; 1235 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1236 if (mbs > 0) { 1237 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1238 if (cpvalues == COPY_VALUES) { 1239 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1240 } 1241 } 1242 1243 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1244 c->sorted = a->sorted; 1245 c->roworiented = a->roworiented; 1246 c->nonew = a->nonew; 1247 1248 if (a->diag) { 1249 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1250 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1251 for ( i=0; i<mbs; i++ ) { 1252 c->diag[i] = a->diag[i]; 1253 } 1254 } 1255 else c->diag = 0; 1256 c->nz = a->nz; 1257 c->maxnz = a->maxnz; 1258 c->solve_work = 0; 1259 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1260 c->mult_work = 0; 1261 *B = C; 1262 return 0; 1263 } 1264 1265 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1266 { 1267 Mat_SeqBAIJ *a; 1268 Mat B; 1269 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1270 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1271 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1272 int *masked, nmask,tmp,bs2,ishift; 1273 Scalar *aa; 1274 MPI_Comm comm = ((PetscObject) viewer)->comm; 1275 1276 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1277 bs2 = bs*bs; 1278 1279 MPI_Comm_size(comm,&size); 1280 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1281 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1282 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1283 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1284 M = header[1]; N = header[2]; nz = header[3]; 1285 1286 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1287 1288 /* 1289 This code adds extra rows to make sure the number of rows is 1290 divisible by the blocksize 1291 */ 1292 mbs = M/bs; 1293 extra_rows = bs - M + bs*(mbs); 1294 if (extra_rows == bs) extra_rows = 0; 1295 else mbs++; 1296 if (extra_rows) { 1297 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 1298 } 1299 1300 /* read in row lengths */ 1301 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1302 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1303 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1304 1305 /* read in column indices */ 1306 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1307 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1308 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1309 1310 /* loop over row lengths determining block row lengths */ 1311 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1312 PetscMemzero(browlengths,mbs*sizeof(int)); 1313 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1314 PetscMemzero(mask,mbs*sizeof(int)); 1315 masked = mask + mbs; 1316 rowcount = 0; nzcount = 0; 1317 for ( i=0; i<mbs; i++ ) { 1318 nmask = 0; 1319 for ( j=0; j<bs; j++ ) { 1320 kmax = rowlengths[rowcount]; 1321 for ( k=0; k<kmax; k++ ) { 1322 tmp = jj[nzcount++]/bs; 1323 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1324 } 1325 rowcount++; 1326 } 1327 browlengths[i] += nmask; 1328 /* zero out the mask elements we set */ 1329 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1330 } 1331 1332 /* create our matrix */ 1333 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1334 CHKERRQ(ierr); 1335 B = *A; 1336 a = (Mat_SeqBAIJ *) B->data; 1337 1338 /* set matrix "i" values */ 1339 a->i[0] = 0; 1340 for ( i=1; i<= mbs; i++ ) { 1341 a->i[i] = a->i[i-1] + browlengths[i-1]; 1342 a->ilen[i-1] = browlengths[i-1]; 1343 } 1344 a->nz = 0; 1345 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1346 1347 /* read in nonzero values */ 1348 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1349 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1350 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1351 1352 /* set "a" and "j" values into matrix */ 1353 nzcount = 0; jcount = 0; 1354 for ( i=0; i<mbs; i++ ) { 1355 nzcountb = nzcount; 1356 nmask = 0; 1357 for ( j=0; j<bs; j++ ) { 1358 kmax = rowlengths[i*bs+j]; 1359 for ( k=0; k<kmax; k++ ) { 1360 tmp = jj[nzcount++]/bs; 1361 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1362 } 1363 rowcount++; 1364 } 1365 /* sort the masked values */ 1366 PetscSortInt(nmask,masked); 1367 1368 /* set "j" values into matrix */ 1369 maskcount = 1; 1370 for ( j=0; j<nmask; j++ ) { 1371 a->j[jcount++] = masked[j]; 1372 mask[masked[j]] = maskcount++; 1373 } 1374 /* set "a" values into matrix */ 1375 ishift = bs2*a->i[i]; 1376 for ( j=0; j<bs; j++ ) { 1377 kmax = rowlengths[i*bs+j]; 1378 for ( k=0; k<kmax; k++ ) { 1379 tmp = jj[nzcountb]/bs ; 1380 block = mask[tmp] - 1; 1381 point = jj[nzcountb] - bs*tmp; 1382 idx = ishift + bs2*block + j + bs*point; 1383 a->a[idx] = aa[nzcountb++]; 1384 } 1385 } 1386 /* zero out the mask elements we set */ 1387 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1388 } 1389 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1390 1391 PetscFree(rowlengths); 1392 PetscFree(browlengths); 1393 PetscFree(aa); 1394 PetscFree(jj); 1395 PetscFree(mask); 1396 1397 B->assembled = PETSC_TRUE; 1398 1399 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1400 if (flg) { 1401 Viewer tviewer; 1402 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1403 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1404 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1405 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1406 } 1407 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1408 if (flg) { 1409 Viewer tviewer; 1410 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1411 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1412 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1413 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1414 } 1415 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1416 if (flg) { 1417 Viewer tviewer; 1418 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1419 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1420 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1421 } 1422 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1423 if (flg) { 1424 Viewer tviewer; 1425 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1426 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1427 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1428 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1429 } 1430 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1431 if (flg) { 1432 Viewer tviewer; 1433 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1434 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1435 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1436 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1437 } 1438 return 0; 1439 } 1440 1441 1442 1443