1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.59 1996/07/11 04:04:53 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = 0; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 45 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 46 PetscFree(ia); PetscFree(ja); 47 } else { 48 if (ishift == oshift) { 49 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 50 } 51 else if (ishift == -1) { 52 /* temporarily subtract 1 from i and j indices */ 53 int nz = a->i[n] - 1; 54 for ( i=0; i<nz; i++ ) a->j[i]--; 55 for ( i=0; i<n+1; i++ ) a->i[i]--; 56 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 57 for ( i=0; i<nz; i++ ) a->j[i]++; 58 for ( i=0; i<n+1; i++ ) a->i[i]++; 59 } else { 60 /* temporarily add 1 to i and j indices */ 61 int nz = a->i[n] - 1; 62 for ( i=0; i<nz; i++ ) a->j[i]++; 63 for ( i=0; i<n+1; i++ ) a->i[i]++; 64 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 65 for ( i=0; i<nz; i++ ) a->j[i]--; 66 for ( i=0; i<n+1; i++ ) a->i[i]--; 67 } 68 } 69 return 0; 70 } 71 72 /* 73 Adds diagonal pointers to sparse matrix structure. 74 */ 75 76 int MatMarkDiag_SeqBAIJ(Mat A) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 79 int i,j, *diag, m = a->mbs; 80 81 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 82 PLogObjectMemory(A,(m+1)*sizeof(int)); 83 for ( i=0; i<m; i++ ) { 84 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 85 if (a->j[j] == i) { 86 diag[i] = j; 87 break; 88 } 89 } 90 } 91 a->diag = diag; 92 return 0; 93 } 94 95 #include "draw.h" 96 #include "pinclude/pviewer.h" 97 #include "sys.h" 98 99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 100 { 101 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 102 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 103 Scalar *aa; 104 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 col_lens[1] = a->m; 109 col_lens[2] = a->n; 110 col_lens[3] = a->nz*bs2; 111 112 /* store lengths of each row and write (including header) to file */ 113 count = 0; 114 for ( i=0; i<a->mbs; i++ ) { 115 for ( j=0; j<bs; j++ ) { 116 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 117 } 118 } 119 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 120 PetscFree(col_lens); 121 122 /* store column indices (zero start index) */ 123 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 124 count = 0; 125 for ( i=0; i<a->mbs; i++ ) { 126 for ( j=0; j<bs; j++ ) { 127 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 128 for ( l=0; l<bs; l++ ) { 129 jj[count++] = bs*a->j[k] + l; 130 } 131 } 132 } 133 } 134 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 135 PetscFree(jj); 136 137 /* store nonzero values */ 138 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 139 count = 0; 140 for ( i=0; i<a->mbs; i++ ) { 141 for ( j=0; j<bs; j++ ) { 142 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 143 for ( l=0; l<bs; l++ ) { 144 aa[count++] = a->a[bs2*k + l*bs + j]; 145 } 146 } 147 } 148 } 149 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 150 PetscFree(aa); 151 return 0; 152 } 153 154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 155 { 156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 157 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 158 FILE *fd; 159 char *outputname; 160 161 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 162 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 163 ierr = ViewerGetFormat(viewer,&format); 164 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 165 fprintf(fd," block size is %d\n",bs); 166 } 167 else if (format == ASCII_FORMAT_MATLAB) { 168 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 169 } 170 else if (format == ASCII_FORMAT_COMMON) { 171 for ( i=0; i<a->mbs; i++ ) { 172 for ( j=0; j<bs; j++ ) { 173 fprintf(fd,"row %d:",i*bs+j); 174 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 175 for ( l=0; l<bs; l++ ) { 176 #if defined(PETSC_COMPLEX) 177 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 178 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 179 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 180 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 181 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 182 #else 183 if (a->a[bs2*k + l*bs + j] != 0.0) 184 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 185 #endif 186 } 187 } 188 fprintf(fd,"\n"); 189 } 190 } 191 } 192 else { 193 for ( i=0; i<a->mbs; i++ ) { 194 for ( j=0; j<bs; j++ ) { 195 fprintf(fd,"row %d:",i*bs+j); 196 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 197 for ( l=0; l<bs; l++ ) { 198 #if defined(PETSC_COMPLEX) 199 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 200 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 201 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 202 } 203 else { 204 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 205 } 206 #else 207 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 208 #endif 209 } 210 } 211 fprintf(fd,"\n"); 212 } 213 } 214 } 215 fflush(fd); 216 return 0; 217 } 218 219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 220 { 221 Mat A = (Mat) obj; 222 ViewerType vtype; 223 int ierr; 224 225 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 226 if (vtype == MATLAB_VIEWER) { 227 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 228 } 229 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 230 return MatView_SeqBAIJ_ASCII(A,viewer); 231 } 232 else if (vtype == BINARY_FILE_VIEWER) { 233 return MatView_SeqBAIJ_Binary(A,viewer); 234 } 235 else if (vtype == DRAW_VIEWER) { 236 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 237 } 238 return 0; 239 } 240 241 #define CHUNKSIZE 10 242 243 /* This version has row oriented v */ 244 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 245 { 246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 247 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 248 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 249 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 250 int ridx,cidx,bs2=a->bs2; 251 Scalar *ap,value,*aa=a->a,*bap; 252 253 for ( k=0; k<m; k++ ) { /* loop over added rows */ 254 row = im[k]; brow = row/bs; 255 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 256 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 257 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 258 rmax = imax[brow]; nrow = ailen[brow]; 259 low = 0; 260 for ( l=0; l<n; l++ ) { /* loop over added columns */ 261 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 262 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 263 col = in[l]; bcol = col/bs; 264 ridx = row % bs; cidx = col % bs; 265 if (roworiented) { 266 value = *v++; 267 } 268 else { 269 value = v[k + l*m]; 270 } 271 if (!sorted) low = 0; high = nrow; 272 while (high-low > 5) { 273 t = (low+high)/2; 274 if (rp[t] > bcol) high = t; 275 else low = t; 276 } 277 for ( i=low; i<high; i++ ) { 278 if (rp[i] > bcol) break; 279 if (rp[i] == bcol) { 280 bap = ap + bs2*i + bs*cidx + ridx; 281 if (is == ADD_VALUES) *bap += value; 282 else *bap = value; 283 goto noinsert; 284 } 285 } 286 if (nonew) goto noinsert; 287 if (nrow >= rmax) { 288 /* there is no extra room in row, therefore enlarge */ 289 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 290 Scalar *new_a; 291 292 /* malloc new storage space */ 293 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 294 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 295 new_j = (int *) (new_a + bs2*new_nz); 296 new_i = new_j + new_nz; 297 298 /* copy over old data into new slots */ 299 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 300 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 301 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 302 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 303 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 304 len*sizeof(int)); 305 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 306 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 307 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 308 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 309 /* free up old matrix storage */ 310 PetscFree(a->a); 311 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 312 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 313 a->singlemalloc = 1; 314 315 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 316 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 317 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 318 a->maxnz += bs2*CHUNKSIZE; 319 a->reallocs++; 320 a->nz++; 321 } 322 N = nrow++ - 1; 323 /* shift up all the later entries in this row */ 324 for ( ii=N; ii>=i; ii-- ) { 325 rp[ii+1] = rp[ii]; 326 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 327 } 328 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 329 rp[i] = bcol; 330 ap[bs2*i + bs*cidx + ridx] = value; 331 noinsert:; 332 low = i; 333 } 334 ailen[brow] = nrow; 335 } 336 return 0; 337 } 338 339 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 340 { 341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 342 *m = a->m; *n = a->n; 343 return 0; 344 } 345 346 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 347 { 348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 349 *m = 0; *n = a->m; 350 return 0; 351 } 352 353 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 354 { 355 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 356 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 357 Scalar *aa,*v_i,*aa_i; 358 359 bs = a->bs; 360 ai = a->i; 361 aj = a->j; 362 aa = a->a; 363 bs2 = a->bs2; 364 365 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 366 367 bn = row/bs; /* Block number */ 368 bp = row % bs; /* Block Position */ 369 M = ai[bn+1] - ai[bn]; 370 *nz = bs*M; 371 372 if (v) { 373 *v = 0; 374 if (*nz) { 375 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 376 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 377 v_i = *v + i*bs; 378 aa_i = aa + bs2*(ai[bn] + i); 379 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 380 } 381 } 382 } 383 384 if (idx) { 385 *idx = 0; 386 if (*nz) { 387 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 388 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 389 idx_i = *idx + i*bs; 390 itmp = bs*aj[ai[bn] + i]; 391 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 392 } 393 } 394 } 395 return 0; 396 } 397 398 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 399 { 400 if (idx) {if (*idx) PetscFree(*idx);} 401 if (v) {if (*v) PetscFree(*v);} 402 return 0; 403 } 404 405 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 406 { 407 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 408 Mat C; 409 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 410 int *rows,*cols,bs2=a->bs2; 411 Scalar *array=a->a; 412 413 if (B==PETSC_NULL && mbs!=nbs) 414 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 415 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 416 PetscMemzero(col,(1+nbs)*sizeof(int)); 417 418 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 419 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 420 PetscFree(col); 421 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 422 cols = rows + bs; 423 for ( i=0; i<mbs; i++ ) { 424 cols[0] = i*bs; 425 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 426 len = ai[i+1] - ai[i]; 427 for ( j=0; j<len; j++ ) { 428 rows[0] = (*aj++)*bs; 429 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 430 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 431 array += bs2; 432 } 433 } 434 PetscFree(rows); 435 436 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 437 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 438 439 if (B != PETSC_NULL) { 440 *B = C; 441 } else { 442 /* This isn't really an in-place transpose */ 443 PetscFree(a->a); 444 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 445 if (a->diag) PetscFree(a->diag); 446 if (a->ilen) PetscFree(a->ilen); 447 if (a->imax) PetscFree(a->imax); 448 if (a->solve_work) PetscFree(a->solve_work); 449 PetscFree(a); 450 PetscMemcpy(A,C,sizeof(struct _Mat)); 451 PetscHeaderDestroy(C); 452 } 453 return 0; 454 } 455 456 457 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 458 { 459 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 460 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 461 int m = a->m,*ip, N, *ailen = a->ilen; 462 int mbs = a->mbs, bs2 = a->bs2; 463 Scalar *aa = a->a, *ap; 464 465 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 466 467 for ( i=1; i<mbs; i++ ) { 468 /* move each row back by the amount of empty slots (fshift) before it*/ 469 fshift += imax[i-1] - ailen[i-1]; 470 if (fshift) { 471 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 472 N = ailen[i]; 473 for ( j=0; j<N; j++ ) { 474 ip[j-fshift] = ip[j]; 475 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 476 } 477 } 478 ai[i] = ai[i-1] + ailen[i-1]; 479 } 480 if (mbs) { 481 fshift += imax[mbs-1] - ailen[mbs-1]; 482 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 483 } 484 /* reset ilen and imax for each row */ 485 for ( i=0; i<mbs; i++ ) { 486 ailen[i] = imax[i] = ai[i+1] - ai[i]; 487 } 488 a->nz = ai[mbs]; 489 490 /* diagonals may have moved, so kill the diagonal pointers */ 491 if (fshift && a->diag) { 492 PetscFree(a->diag); 493 PLogObjectMemory(A,-(m+1)*sizeof(int)); 494 a->diag = 0; 495 } 496 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 497 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 498 a->reallocs); 499 return 0; 500 } 501 502 static int MatZeroEntries_SeqBAIJ(Mat A) 503 { 504 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 505 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 506 return 0; 507 } 508 509 int MatDestroy_SeqBAIJ(PetscObject obj) 510 { 511 Mat A = (Mat) obj; 512 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 513 514 #if defined(PETSC_LOG) 515 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 516 #endif 517 PetscFree(a->a); 518 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 519 if (a->diag) PetscFree(a->diag); 520 if (a->ilen) PetscFree(a->ilen); 521 if (a->imax) PetscFree(a->imax); 522 if (a->solve_work) PetscFree(a->solve_work); 523 if (a->mult_work) PetscFree(a->mult_work); 524 PetscFree(a); 525 PLogObjectDestroy(A); 526 PetscHeaderDestroy(A); 527 return 0; 528 } 529 530 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 531 { 532 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 533 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 534 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 535 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 536 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 537 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 538 else if (op == MAT_ROWS_SORTED || 539 op == MAT_SYMMETRIC || 540 op == MAT_STRUCTURALLY_SYMMETRIC || 541 op == MAT_YES_NEW_DIAGONALS) 542 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 543 else if (op == MAT_NO_NEW_DIAGONALS) 544 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 545 else 546 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 547 return 0; 548 } 549 550 551 /* -------------------------------------------------------*/ 552 /* Should check that shapes of vectors and matrices match */ 553 /* -------------------------------------------------------*/ 554 #include "pinclude/plapack.h" 555 556 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 557 { 558 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 559 Scalar *xg,*zg; 560 register Scalar *x,*z,*v,sum; 561 int mbs=a->mbs,i,*idx,*ii,n,ierr; 562 563 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 564 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 565 566 idx = a->j; 567 v = a->a; 568 ii = a->i; 569 570 for ( i=0; i<mbs; i++ ) { 571 n = ii[1] - ii[0]; ii++; 572 sum = 0.0; 573 while (n--) sum += *v++ * x[*idx++]; 574 z[i] = sum; 575 } 576 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 577 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 578 PLogFlops(2*a->nz - a->m); 579 return 0; 580 } 581 582 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 583 { 584 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 585 Scalar *xg,*zg; 586 register Scalar *x,*z,*v,*xb,sum1,sum2; 587 register Scalar x1,x2; 588 int mbs=a->mbs,i,*idx,*ii; 589 int j,n,ierr; 590 591 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 592 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 593 594 idx = a->j; 595 v = a->a; 596 ii = a->i; 597 598 for ( i=0; i<mbs; i++ ) { 599 n = ii[1] - ii[0]; ii++; 600 sum1 = 0.0; sum2 = 0.0; 601 for ( j=0; j<n; j++ ) { 602 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 603 sum1 += v[0]*x1 + v[2]*x2; 604 sum2 += v[1]*x1 + v[3]*x2; 605 v += 4; 606 } 607 z[0] = sum1; z[1] = sum2; 608 z += 2; 609 } 610 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 611 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 612 PLogFlops(4*a->nz - a->m); 613 return 0; 614 } 615 616 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 617 { 618 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 619 Scalar *xg,*zg; 620 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 621 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 622 623 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 624 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 625 626 idx = a->j; 627 v = a->a; 628 ii = a->i; 629 630 for ( i=0; i<mbs; i++ ) { 631 n = ii[1] - ii[0]; ii++; 632 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 633 for ( j=0; j<n; j++ ) { 634 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 635 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 636 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 637 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 638 v += 9; 639 } 640 z[0] = sum1; z[1] = sum2; z[2] = sum3; 641 z += 3; 642 } 643 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 644 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 645 PLogFlops(18*a->nz - a->m); 646 return 0; 647 } 648 649 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 650 { 651 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 652 Scalar *xg,*zg; 653 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 654 register Scalar x1,x2,x3,x4; 655 int mbs=a->mbs,i,*idx,*ii; 656 int j,n,ierr; 657 658 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 659 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 660 661 idx = a->j; 662 v = a->a; 663 ii = a->i; 664 665 for ( i=0; i<mbs; i++ ) { 666 n = ii[1] - ii[0]; ii++; 667 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 668 for ( j=0; j<n; j++ ) { 669 xb = x + 4*(*idx++); 670 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 671 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 672 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 673 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 674 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 675 v += 16; 676 } 677 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 678 z += 4; 679 } 680 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 681 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 682 PLogFlops(32*a->nz - a->m); 683 return 0; 684 } 685 686 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 687 { 688 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 689 Scalar *xg,*zg; 690 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 691 register Scalar x1,x2,x3,x4,x5; 692 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 693 694 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 695 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 696 697 idx = a->j; 698 v = a->a; 699 ii = a->i; 700 701 for ( i=0; i<mbs; i++ ) { 702 n = ii[1] - ii[0]; ii++; 703 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 704 for ( j=0; j<n; j++ ) { 705 xb = x + 5*(*idx++); 706 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 707 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 708 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 709 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 710 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 711 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 712 v += 25; 713 } 714 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 715 z += 5; 716 } 717 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 718 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 719 PLogFlops(50*a->nz - a->m); 720 return 0; 721 } 722 723 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 724 { 725 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 726 Scalar *xg,*zg; 727 register Scalar *x,*z,*v,*xb; 728 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 729 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0; 730 731 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 732 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 733 734 idx = a->j; 735 v = a->a; 736 ii = a->i; 737 738 739 if (!a->mult_work) { 740 k = PetscMax(a->m,a->n); 741 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 742 } 743 work = a->mult_work; 744 for ( i=0; i<mbs; i++ ) { 745 n = ii[1] - ii[0]; ii++; 746 ncols = n*bs; 747 workt = work; 748 for ( j=0; j<n; j++ ) { 749 xb = x + bs*(*idx++); 750 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 751 workt += bs; 752 } 753 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 754 v += n*bs2; 755 z += bs; 756 } 757 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 758 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 759 PLogFlops(2*a->nz*bs2 - a->m); 760 return 0; 761 } 762 763 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 764 { 765 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 766 Scalar *xg,*yg,*zg; 767 register Scalar *x,*y,*z,*v,sum; 768 int mbs=a->mbs,i,*idx,*ii,n,ierr; 769 770 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 771 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 772 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 773 774 idx = a->j; 775 v = a->a; 776 ii = a->i; 777 778 for ( i=0; i<mbs; i++ ) { 779 n = ii[1] - ii[0]; ii++; 780 sum = y[i]; 781 while (n--) sum += *v++ * x[*idx++]; 782 z[i] = sum; 783 } 784 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 785 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 786 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 787 PLogFlops(2*a->nz); 788 return 0; 789 } 790 791 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 792 { 793 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 794 Scalar *xg,*yg,*zg; 795 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 796 register Scalar x1,x2; 797 int mbs=a->mbs,i,*idx,*ii; 798 int j,n,ierr; 799 800 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 801 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 802 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 803 804 idx = a->j; 805 v = a->a; 806 ii = a->i; 807 808 for ( i=0; i<mbs; i++ ) { 809 n = ii[1] - ii[0]; ii++; 810 sum1 = y[0]; sum2 = y[1]; 811 for ( j=0; j<n; j++ ) { 812 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 813 sum1 += v[0]*x1 + v[2]*x2; 814 sum2 += v[1]*x1 + v[3]*x2; 815 v += 4; 816 } 817 z[0] = sum1; z[1] = sum2; 818 z += 2; y += 2; 819 } 820 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 821 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 822 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 823 PLogFlops(4*a->nz); 824 return 0; 825 } 826 827 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 830 Scalar *xg,*yg,*zg; 831 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 832 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 833 834 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 835 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 836 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 837 838 idx = a->j; 839 v = a->a; 840 ii = a->i; 841 842 for ( i=0; i<mbs; i++ ) { 843 n = ii[1] - ii[0]; ii++; 844 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 845 for ( j=0; j<n; j++ ) { 846 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 847 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 848 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 849 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 850 v += 9; 851 } 852 z[0] = sum1; z[1] = sum2; z[2] = sum3; 853 z += 3; y += 3; 854 } 855 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 856 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 857 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 858 PLogFlops(18*a->nz); 859 return 0; 860 } 861 862 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 863 { 864 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 865 Scalar *xg,*yg,*zg; 866 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 867 register Scalar x1,x2,x3,x4; 868 int mbs=a->mbs,i,*idx,*ii; 869 int j,n,ierr; 870 871 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 872 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 873 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 874 875 idx = a->j; 876 v = a->a; 877 ii = a->i; 878 879 for ( i=0; i<mbs; i++ ) { 880 n = ii[1] - ii[0]; ii++; 881 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 882 for ( j=0; j<n; j++ ) { 883 xb = x + 4*(*idx++); 884 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 885 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 886 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 887 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 888 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 889 v += 16; 890 } 891 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 892 z += 4; y += 4; 893 } 894 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 895 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 896 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 897 PLogFlops(32*a->nz); 898 return 0; 899 } 900 901 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 902 { 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 904 Scalar *xg,*yg,*zg; 905 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 906 register Scalar x1,x2,x3,x4,x5; 907 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 908 909 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 910 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 911 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 912 913 idx = a->j; 914 v = a->a; 915 ii = a->i; 916 917 for ( i=0; i<mbs; i++ ) { 918 n = ii[1] - ii[0]; ii++; 919 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 920 for ( j=0; j<n; j++ ) { 921 xb = x + 5*(*idx++); 922 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 923 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 924 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 925 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 926 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 927 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 928 v += 25; 929 } 930 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 931 z += 5; y += 5; 932 } 933 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 934 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 935 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 936 PLogFlops(50*a->nz); 937 return 0; 938 } 939 940 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 941 { 942 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 943 Scalar *xg,*zg; 944 register Scalar *x,*z,*v,*xb; 945 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 946 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 947 948 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 949 950 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 951 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 952 953 idx = a->j; 954 v = a->a; 955 ii = a->i; 956 957 958 if (!a->mult_work) { 959 k = PetscMax(a->m,a->n); 960 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 961 } 962 work = a->mult_work; 963 for ( i=0; i<mbs; i++ ) { 964 n = ii[1] - ii[0]; ii++; 965 ncols = n*bs; 966 workt = work; 967 for ( j=0; j<n; j++ ) { 968 xb = x + bs*(*idx++); 969 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 970 workt += bs; 971 } 972 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 973 v += n*bs2; 974 z += bs; 975 } 976 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 977 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 978 PLogFlops(2*a->nz*bs2 ); 979 return 0; 980 } 981 982 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 983 { 984 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 985 Scalar *xg,*zg,*zb; 986 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 987 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 988 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 989 990 991 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 992 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 993 PetscMemzero(z,N*sizeof(Scalar)); 994 995 idx = a->j; 996 v = a->a; 997 ii = a->i; 998 999 switch (bs) { 1000 case 1: 1001 for ( i=0; i<mbs; i++ ) { 1002 n = ii[1] - ii[0]; ii++; 1003 xb = x + i; x1 = xb[0]; 1004 ib = idx + ai[i]; 1005 for ( j=0; j<n; j++ ) { 1006 rval = ib[j]; 1007 z[rval] += *v++ * x1; 1008 } 1009 } 1010 break; 1011 case 2: 1012 for ( i=0; i<mbs; i++ ) { 1013 n = ii[1] - ii[0]; ii++; 1014 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1015 ib = idx + ai[i]; 1016 for ( j=0; j<n; j++ ) { 1017 rval = ib[j]*2; 1018 z[rval++] += v[0]*x1 + v[1]*x2; 1019 z[rval++] += v[2]*x1 + v[3]*x2; 1020 v += 4; 1021 } 1022 } 1023 break; 1024 case 3: 1025 for ( i=0; i<mbs; i++ ) { 1026 n = ii[1] - ii[0]; ii++; 1027 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1028 ib = idx + ai[i]; 1029 for ( j=0; j<n; j++ ) { 1030 rval = ib[j]*3; 1031 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1032 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1033 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1034 v += 9; 1035 } 1036 } 1037 break; 1038 case 4: 1039 for ( i=0; i<mbs; i++ ) { 1040 n = ii[1] - ii[0]; ii++; 1041 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1042 ib = idx + ai[i]; 1043 for ( j=0; j<n; j++ ) { 1044 rval = ib[j]*4; 1045 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1046 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1047 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1048 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1049 v += 16; 1050 } 1051 } 1052 break; 1053 case 5: 1054 for ( i=0; i<mbs; i++ ) { 1055 n = ii[1] - ii[0]; ii++; 1056 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1057 x4 = xb[3]; x5 = xb[4]; 1058 ib = idx + ai[i]; 1059 for ( j=0; j<n; j++ ) { 1060 rval = ib[j]*5; 1061 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1062 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1063 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1064 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1065 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1066 v += 25; 1067 } 1068 } 1069 break; 1070 /* block sizes larger then 5 by 5 are handled by BLAS */ 1071 default: { 1072 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1073 if (!a->mult_work) { 1074 k = PetscMax(a->m,a->n); 1075 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1076 CHKPTRQ(a->mult_work); 1077 } 1078 work = a->mult_work; 1079 for ( i=0; i<mbs; i++ ) { 1080 n = ii[1] - ii[0]; ii++; 1081 ncols = n*bs; 1082 PetscMemzero(work,ncols*sizeof(Scalar)); 1083 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1084 v += n*bs2; 1085 x += bs; 1086 workt = work; 1087 for ( j=0; j<n; j++ ) { 1088 zb = z + bs*(*idx++); 1089 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1090 workt += bs; 1091 } 1092 } 1093 } 1094 } 1095 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1096 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1097 PLogFlops(2*a->nz*a->bs2 - a->n); 1098 return 0; 1099 } 1100 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1101 { 1102 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1103 Scalar *xg,*zg,*zb; 1104 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1105 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1106 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1107 1108 1109 1110 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1111 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1112 1113 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1114 else PetscMemzero(z,N*sizeof(Scalar)); 1115 1116 1117 idx = a->j; 1118 v = a->a; 1119 ii = a->i; 1120 1121 switch (bs) { 1122 case 1: 1123 for ( i=0; i<mbs; i++ ) { 1124 n = ii[1] - ii[0]; ii++; 1125 xb = x + i; x1 = xb[0]; 1126 ib = idx + ai[i]; 1127 for ( j=0; j<n; j++ ) { 1128 rval = ib[j]; 1129 z[rval] += *v++ * x1; 1130 } 1131 } 1132 break; 1133 case 2: 1134 for ( i=0; i<mbs; i++ ) { 1135 n = ii[1] - ii[0]; ii++; 1136 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1137 ib = idx + ai[i]; 1138 for ( j=0; j<n; j++ ) { 1139 rval = ib[j]*2; 1140 z[rval++] += v[0]*x1 + v[1]*x2; 1141 z[rval++] += v[2]*x1 + v[3]*x2; 1142 v += 4; 1143 } 1144 } 1145 break; 1146 case 3: 1147 for ( i=0; i<mbs; i++ ) { 1148 n = ii[1] - ii[0]; ii++; 1149 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1150 ib = idx + ai[i]; 1151 for ( j=0; j<n; j++ ) { 1152 rval = ib[j]*3; 1153 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1154 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1155 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1156 v += 9; 1157 } 1158 } 1159 break; 1160 case 4: 1161 for ( i=0; i<mbs; i++ ) { 1162 n = ii[1] - ii[0]; ii++; 1163 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1164 ib = idx + ai[i]; 1165 for ( j=0; j<n; j++ ) { 1166 rval = ib[j]*4; 1167 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1168 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1169 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1170 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1171 v += 16; 1172 } 1173 } 1174 break; 1175 case 5: 1176 for ( i=0; i<mbs; i++ ) { 1177 n = ii[1] - ii[0]; ii++; 1178 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1179 x4 = xb[3]; x5 = xb[4]; 1180 ib = idx + ai[i]; 1181 for ( j=0; j<n; j++ ) { 1182 rval = ib[j]*5; 1183 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1184 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1185 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1186 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1187 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1188 v += 25; 1189 } 1190 } 1191 break; 1192 /* block sizes larger then 5 by 5 are handled by BLAS */ 1193 default: { 1194 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1195 if (!a->mult_work) { 1196 k = PetscMax(a->m,a->n); 1197 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1198 CHKPTRQ(a->mult_work); 1199 } 1200 work = a->mult_work; 1201 for ( i=0; i<mbs; i++ ) { 1202 n = ii[1] - ii[0]; ii++; 1203 ncols = n*bs; 1204 PetscMemzero(work,ncols*sizeof(Scalar)); 1205 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1206 v += n*bs2; 1207 x += bs; 1208 workt = work; 1209 for ( j=0; j<n; j++ ) { 1210 zb = z + bs*(*idx++); 1211 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1212 workt += bs; 1213 } 1214 } 1215 } 1216 } 1217 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1218 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1219 PLogFlops(2*a->nz*a->bs2); 1220 return 0; 1221 } 1222 1223 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 1224 { 1225 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1226 if (nz) *nz = a->bs2*a->nz; 1227 if (nza) *nza = a->maxnz; 1228 if (mem) *mem = (int)A->mem; 1229 return 0; 1230 } 1231 1232 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1233 { 1234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1235 1236 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1237 1238 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1239 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1240 (a->nz != b->nz)) { 1241 *flg = PETSC_FALSE; return 0; 1242 } 1243 1244 /* if the a->i are the same */ 1245 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1246 *flg = PETSC_FALSE; return 0; 1247 } 1248 1249 /* if a->j are the same */ 1250 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1251 *flg = PETSC_FALSE; return 0; 1252 } 1253 1254 /* if a->a are the same */ 1255 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1256 *flg = PETSC_FALSE; return 0; 1257 } 1258 *flg = PETSC_TRUE; 1259 return 0; 1260 1261 } 1262 1263 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1264 { 1265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1266 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1267 Scalar *x, zero = 0.0,*aa,*aa_j; 1268 1269 bs = a->bs; 1270 aa = a->a; 1271 ai = a->i; 1272 aj = a->j; 1273 ambs = a->mbs; 1274 bs2 = a->bs2; 1275 1276 VecSet(&zero,v); 1277 VecGetArray(v,&x); VecGetLocalSize(v,&n); 1278 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1279 for ( i=0; i<ambs; i++ ) { 1280 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1281 if (aj[j] == i) { 1282 row = i*bs; 1283 aa_j = aa+j*bs2; 1284 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1285 break; 1286 } 1287 } 1288 } 1289 return 0; 1290 } 1291 1292 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1293 { 1294 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1295 Scalar *l,*r,x,*v,*aa,*li,*ri; 1296 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1297 1298 ai = a->i; 1299 aj = a->j; 1300 aa = a->a; 1301 m = a->m; 1302 n = a->n; 1303 bs = a->bs; 1304 mbs = a->mbs; 1305 bs2 = a->bs2; 1306 if (ll) { 1307 VecGetArray(ll,&l); VecGetSize(ll,&lm); 1308 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1309 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1310 M = ai[i+1] - ai[i]; 1311 li = l + i*bs; 1312 v = aa + bs2*ai[i]; 1313 for ( j=0; j<M; j++ ) { /* for each block */ 1314 for ( k=0; k<bs2; k++ ) { 1315 (*v++) *= li[k%bs]; 1316 } 1317 } 1318 } 1319 } 1320 1321 if (rr) { 1322 VecGetArray(rr,&r); VecGetSize(rr,&rn); 1323 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1324 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1325 M = ai[i+1] - ai[i]; 1326 v = aa + bs2*ai[i]; 1327 for ( j=0; j<M; j++ ) { /* for each block */ 1328 ri = r + bs*aj[ai[i]+j]; 1329 for ( k=0; k<bs; k++ ) { 1330 x = ri[k]; 1331 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1332 } 1333 } 1334 } 1335 } 1336 return 0; 1337 } 1338 1339 1340 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1341 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1342 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1343 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1344 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1345 1346 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1347 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1348 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1349 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1350 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1351 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1352 1353 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1354 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1355 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1356 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1357 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1358 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1359 1360 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1361 { 1362 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1363 Scalar *v = a->a; 1364 double sum = 0.0; 1365 int i,nz=a->nz,bs2=a->bs2; 1366 1367 if (type == NORM_FROBENIUS) { 1368 for (i=0; i< bs2*nz; i++ ) { 1369 #if defined(PETSC_COMPLEX) 1370 sum += real(conj(*v)*(*v)); v++; 1371 #else 1372 sum += (*v)*(*v); v++; 1373 #endif 1374 } 1375 *norm = sqrt(sum); 1376 } 1377 else { 1378 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1379 } 1380 return 0; 1381 } 1382 1383 /* 1384 note: This can only work for identity for row and col. It would 1385 be good to check this and otherwise generate an error. 1386 */ 1387 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1388 { 1389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1390 Mat outA; 1391 int ierr; 1392 1393 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1394 1395 outA = inA; 1396 inA->factor = FACTOR_LU; 1397 a->row = row; 1398 a->col = col; 1399 1400 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1401 1402 if (!a->diag) { 1403 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1404 } 1405 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1406 return 0; 1407 } 1408 1409 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1410 { 1411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1412 int one = 1, totalnz = a->bs2*a->nz; 1413 BLscal_( &totalnz, alpha, a->a, &one ); 1414 PLogFlops(totalnz); 1415 return 0; 1416 } 1417 1418 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1419 { 1420 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1421 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1422 int *ai = a->i, *ailen = a->ilen; 1423 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1424 Scalar *ap, *aa = a->a, zero = 0.0; 1425 1426 for ( k=0; k<m; k++ ) { /* loop over rows */ 1427 row = im[k]; brow = row/bs; 1428 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1429 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1430 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1431 nrow = ailen[brow]; 1432 for ( l=0; l<n; l++ ) { /* loop over columns */ 1433 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1434 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1435 col = in[l] ; 1436 bcol = col/bs; 1437 cidx = col%bs; 1438 ridx = row%bs; 1439 high = nrow; 1440 low = 0; /* assume unsorted */ 1441 while (high-low > 5) { 1442 t = (low+high)/2; 1443 if (rp[t] > bcol) high = t; 1444 else low = t; 1445 } 1446 for ( i=low; i<high; i++ ) { 1447 if (rp[i] > bcol) break; 1448 if (rp[i] == bcol) { 1449 *v++ = ap[bs2*i+bs*cidx+ridx]; 1450 goto finished; 1451 } 1452 } 1453 *v++ = zero; 1454 finished:; 1455 } 1456 } 1457 return 0; 1458 } 1459 1460 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1461 { 1462 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1463 *bs = baij->bs; 1464 return 0; 1465 } 1466 1467 /* idx should be of length atlease bs */ 1468 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1469 { 1470 int i,row; 1471 row = idx[0]; 1472 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1473 1474 for ( i=1; i<bs; i++ ) { 1475 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1476 } 1477 *flg = PETSC_TRUE; 1478 return 0; 1479 } 1480 1481 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1482 { 1483 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1484 IS is_local; 1485 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1486 PetscTruth flg; 1487 Scalar zero = 0.0,*aa; 1488 1489 /* Make a copy of the IS and sort it */ 1490 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1491 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1492 ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1493 ierr = ISSort(is_local); CHKERRQ(ierr); 1494 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1495 1496 i = 0; 1497 while (i < is_n) { 1498 if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1499 flg = PETSC_FALSE; 1500 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1501 if (flg) { /* There exists a block of rows to be Zerowed */ 1502 baij->ilen[rows[i]/bs] = 0; 1503 i += bs; 1504 } else { /* Zero out only the requested row */ 1505 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1506 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1507 for ( j=0; j<count; j++ ) { 1508 aa[0] = zero; 1509 aa+=bs; 1510 } 1511 i++; 1512 } 1513 } 1514 if (diag) { 1515 for ( j=0; j<is_n; j++ ) { 1516 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1517 } 1518 } 1519 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1520 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1521 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1524 1525 return 0; 1526 } 1527 1528 /* -------------------------------------------------------------------*/ 1529 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1530 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1531 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1532 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1533 MatSolve_SeqBAIJ_N,0, 1534 0,0, 1535 MatLUFactor_SeqBAIJ,0, 1536 0, 1537 MatTranspose_SeqBAIJ, 1538 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1539 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1540 0,MatAssemblyEnd_SeqBAIJ, 1541 0, 1542 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1543 MatGetReordering_SeqBAIJ, 1544 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1545 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1546 MatILUFactorSymbolic_SeqBAIJ,0, 1547 0,0,/* MatConvert_SeqBAIJ */ 0, 1548 MatGetSubMatrix_SeqBAIJ,0, 1549 MatConvertSameType_SeqBAIJ,0,0, 1550 MatILUFactor_SeqBAIJ,0,0, 1551 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1552 MatGetValues_SeqBAIJ,0, 1553 0,MatScale_SeqBAIJ, 1554 0,0,0,MatGetBlockSize_SeqBAIJ}; 1555 1556 /*@C 1557 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1558 compressed row) format. For good matrix assembly performance the 1559 user should preallocate the matrix storage by setting the parameter nz 1560 (or the array nzz). By setting these parameters accurately, performance 1561 during matrix assembly can be increased by more than a factor of 50. 1562 1563 Input Parameters: 1564 . comm - MPI communicator, set to MPI_COMM_SELF 1565 . bs - size of block 1566 . m - number of rows 1567 . n - number of columns 1568 . nz - number of block nonzeros per block row (same for all rows) 1569 . nzz - array containing the number of block nonzeros in the various block rows 1570 (possibly different for each block row) or PETSC_NULL 1571 1572 Output Parameter: 1573 . A - the matrix 1574 1575 Notes: 1576 The block AIJ format is fully compatible with standard Fortran 77 1577 storage. That is, the stored row and column indices can begin at 1578 either one (as in Fortran) or zero. See the users' manual for details. 1579 1580 Specify the preallocated storage with either nz or nnz (not both). 1581 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1582 allocation. For additional details, see the users manual chapter on 1583 matrices and the file $(PETSC_DIR)/Performance. 1584 1585 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1586 @*/ 1587 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1588 { 1589 Mat B; 1590 Mat_SeqBAIJ *b; 1591 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1592 1593 if (mbs*bs!=m || nbs*bs!=n) 1594 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1595 1596 *A = 0; 1597 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1598 PLogObjectCreate(B); 1599 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1600 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1601 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1602 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1603 if (!flg) { 1604 switch (bs) { 1605 case 1: 1606 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1607 B->ops.solve = MatSolve_SeqBAIJ_1; 1608 B->ops.mult = MatMult_SeqBAIJ_1; 1609 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1610 break; 1611 case 2: 1612 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1613 B->ops.solve = MatSolve_SeqBAIJ_2; 1614 B->ops.mult = MatMult_SeqBAIJ_2; 1615 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1616 break; 1617 case 3: 1618 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1619 B->ops.solve = MatSolve_SeqBAIJ_3; 1620 B->ops.mult = MatMult_SeqBAIJ_3; 1621 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1622 break; 1623 case 4: 1624 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1625 B->ops.solve = MatSolve_SeqBAIJ_4; 1626 B->ops.mult = MatMult_SeqBAIJ_4; 1627 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1628 break; 1629 case 5: 1630 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1631 B->ops.solve = MatSolve_SeqBAIJ_5; 1632 B->ops.mult = MatMult_SeqBAIJ_5; 1633 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1634 break; 1635 } 1636 } 1637 B->destroy = MatDestroy_SeqBAIJ; 1638 B->view = MatView_SeqBAIJ; 1639 B->factor = 0; 1640 B->lupivotthreshold = 1.0; 1641 b->row = 0; 1642 b->col = 0; 1643 b->reallocs = 0; 1644 1645 b->m = m; B->m = m; B->M = m; 1646 b->n = n; B->n = n; B->N = n; 1647 b->mbs = mbs; 1648 b->nbs = nbs; 1649 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1650 if (nnz == PETSC_NULL) { 1651 if (nz == PETSC_DEFAULT) nz = 5; 1652 else if (nz <= 0) nz = 1; 1653 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1654 nz = nz*mbs; 1655 } 1656 else { 1657 nz = 0; 1658 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1659 } 1660 1661 /* allocate the matrix space */ 1662 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1663 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1664 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1665 b->j = (int *) (b->a + nz*bs2); 1666 PetscMemzero(b->j,nz*sizeof(int)); 1667 b->i = b->j + nz; 1668 b->singlemalloc = 1; 1669 1670 b->i[0] = 0; 1671 for (i=1; i<mbs+1; i++) { 1672 b->i[i] = b->i[i-1] + b->imax[i-1]; 1673 } 1674 1675 /* b->ilen will count nonzeros in each block row so far. */ 1676 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1677 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1678 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1679 1680 b->bs = bs; 1681 b->bs2 = bs2; 1682 b->mbs = mbs; 1683 b->nz = 0; 1684 b->maxnz = nz*bs2; 1685 b->sorted = 0; 1686 b->roworiented = 1; 1687 b->nonew = 0; 1688 b->diag = 0; 1689 b->solve_work = 0; 1690 b->mult_work = 0; 1691 b->spptr = 0; 1692 *A = B; 1693 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1694 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1695 return 0; 1696 } 1697 1698 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1699 { 1700 Mat C; 1701 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1702 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1703 1704 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1705 1706 *B = 0; 1707 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1708 PLogObjectCreate(C); 1709 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1710 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1711 C->destroy = MatDestroy_SeqBAIJ; 1712 C->view = MatView_SeqBAIJ; 1713 C->factor = A->factor; 1714 c->row = 0; 1715 c->col = 0; 1716 C->assembled = PETSC_TRUE; 1717 1718 c->m = C->m = a->m; 1719 c->n = C->n = a->n; 1720 C->M = a->m; 1721 C->N = a->n; 1722 1723 c->bs = a->bs; 1724 c->bs2 = a->bs2; 1725 c->mbs = a->mbs; 1726 c->nbs = a->nbs; 1727 1728 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1729 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1730 for ( i=0; i<mbs; i++ ) { 1731 c->imax[i] = a->imax[i]; 1732 c->ilen[i] = a->ilen[i]; 1733 } 1734 1735 /* allocate the matrix space */ 1736 c->singlemalloc = 1; 1737 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1738 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1739 c->j = (int *) (c->a + nz*bs2); 1740 c->i = c->j + nz; 1741 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1742 if (mbs > 0) { 1743 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1744 if (cpvalues == COPY_VALUES) { 1745 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1746 } 1747 } 1748 1749 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1750 c->sorted = a->sorted; 1751 c->roworiented = a->roworiented; 1752 c->nonew = a->nonew; 1753 1754 if (a->diag) { 1755 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1756 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1757 for ( i=0; i<mbs; i++ ) { 1758 c->diag[i] = a->diag[i]; 1759 } 1760 } 1761 else c->diag = 0; 1762 c->nz = a->nz; 1763 c->maxnz = a->maxnz; 1764 c->solve_work = 0; 1765 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1766 c->mult_work = 0; 1767 *B = C; 1768 return 0; 1769 } 1770 1771 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1772 { 1773 Mat_SeqBAIJ *a; 1774 Mat B; 1775 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1776 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1777 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1778 int *masked, nmask,tmp,bs2,ishift; 1779 Scalar *aa; 1780 MPI_Comm comm = ((PetscObject) viewer)->comm; 1781 1782 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1783 bs2 = bs*bs; 1784 1785 MPI_Comm_size(comm,&size); 1786 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1787 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1788 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1789 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1790 M = header[1]; N = header[2]; nz = header[3]; 1791 1792 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1793 1794 /* 1795 This code adds extra rows to make sure the number of rows is 1796 divisible by the blocksize 1797 */ 1798 mbs = M/bs; 1799 extra_rows = bs - M + bs*(mbs); 1800 if (extra_rows == bs) extra_rows = 0; 1801 else mbs++; 1802 if (extra_rows) { 1803 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 1804 } 1805 1806 /* read in row lengths */ 1807 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1808 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1809 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1810 1811 /* read in column indices */ 1812 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1813 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1814 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1815 1816 /* loop over row lengths determining block row lengths */ 1817 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1818 PetscMemzero(browlengths,mbs*sizeof(int)); 1819 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1820 PetscMemzero(mask,mbs*sizeof(int)); 1821 masked = mask + mbs; 1822 rowcount = 0; nzcount = 0; 1823 for ( i=0; i<mbs; i++ ) { 1824 nmask = 0; 1825 for ( j=0; j<bs; j++ ) { 1826 kmax = rowlengths[rowcount]; 1827 for ( k=0; k<kmax; k++ ) { 1828 tmp = jj[nzcount++]/bs; 1829 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1830 } 1831 rowcount++; 1832 } 1833 browlengths[i] += nmask; 1834 /* zero out the mask elements we set */ 1835 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1836 } 1837 1838 /* create our matrix */ 1839 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1840 CHKERRQ(ierr); 1841 B = *A; 1842 a = (Mat_SeqBAIJ *) B->data; 1843 1844 /* set matrix "i" values */ 1845 a->i[0] = 0; 1846 for ( i=1; i<= mbs; i++ ) { 1847 a->i[i] = a->i[i-1] + browlengths[i-1]; 1848 a->ilen[i-1] = browlengths[i-1]; 1849 } 1850 a->nz = 0; 1851 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1852 1853 /* read in nonzero values */ 1854 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1855 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1856 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1857 1858 /* set "a" and "j" values into matrix */ 1859 nzcount = 0; jcount = 0; 1860 for ( i=0; i<mbs; i++ ) { 1861 nzcountb = nzcount; 1862 nmask = 0; 1863 for ( j=0; j<bs; j++ ) { 1864 kmax = rowlengths[i*bs+j]; 1865 for ( k=0; k<kmax; k++ ) { 1866 tmp = jj[nzcount++]/bs; 1867 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1868 } 1869 rowcount++; 1870 } 1871 /* sort the masked values */ 1872 PetscSortInt(nmask,masked); 1873 1874 /* set "j" values into matrix */ 1875 maskcount = 1; 1876 for ( j=0; j<nmask; j++ ) { 1877 a->j[jcount++] = masked[j]; 1878 mask[masked[j]] = maskcount++; 1879 } 1880 /* set "a" values into matrix */ 1881 ishift = bs2*a->i[i]; 1882 for ( j=0; j<bs; j++ ) { 1883 kmax = rowlengths[i*bs+j]; 1884 for ( k=0; k<kmax; k++ ) { 1885 tmp = jj[nzcountb]/bs ; 1886 block = mask[tmp] - 1; 1887 point = jj[nzcountb] - bs*tmp; 1888 idx = ishift + bs2*block + j + bs*point; 1889 a->a[idx] = aa[nzcountb++]; 1890 } 1891 } 1892 /* zero out the mask elements we set */ 1893 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1894 } 1895 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1896 1897 PetscFree(rowlengths); 1898 PetscFree(browlengths); 1899 PetscFree(aa); 1900 PetscFree(jj); 1901 PetscFree(mask); 1902 1903 B->assembled = PETSC_TRUE; 1904 1905 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1906 if (flg) { 1907 Viewer tviewer; 1908 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1909 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1910 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1911 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1912 } 1913 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1914 if (flg) { 1915 Viewer tviewer; 1916 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1917 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1918 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1919 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1920 } 1921 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1922 if (flg) { 1923 Viewer tviewer; 1924 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1925 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1926 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1927 } 1928 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1929 if (flg) { 1930 Viewer tviewer; 1931 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1932 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1933 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1934 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1935 } 1936 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1937 if (flg) { 1938 Viewer tviewer; 1939 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1940 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1941 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1942 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1943 } 1944 return 0; 1945 } 1946 1947 1948 1949