1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 default: 183 SETERRQ(PETSC_ERR_SUP,"unknown option"); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 190 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 191 { 192 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 193 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 194 MatScalar *aa,*aa_i; 195 PetscScalar *v_i; 196 197 PetscFunctionBegin; 198 bs = a->bs; 199 ai = a->i; 200 aj = a->j; 201 aa = a->a; 202 bs2 = a->bs2; 203 204 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 205 206 bn = row/bs; /* Block number */ 207 bp = row % bs; /* Block position */ 208 M = ai[bn+1] - ai[bn]; 209 *ncols = bs*M; 210 211 if (v) { 212 *v = 0; 213 if (*ncols) { 214 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 215 for (i=0; i<M; i++) { /* for each block in the block row */ 216 v_i = *v + i*bs; 217 aa_i = aa + bs2*(ai[bn] + i); 218 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 219 } 220 } 221 } 222 223 if (cols) { 224 *cols = 0; 225 if (*ncols) { 226 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 227 for (i=0; i<M; i++) { /* for each block in the block row */ 228 cols_i = *cols + i*bs; 229 itmp = bs*aj[ai[bn] + i]; 230 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 231 } 232 } 233 } 234 235 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 236 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 237 #ifdef column_search 238 v_i = *v + M*bs; 239 cols_i = *cols + M*bs; 240 for (i=0; i<bn; i++){ /* for each block row */ 241 M = ai[i+1] - ai[i]; 242 for (j=0; j<M; j++){ 243 itmp = aj[ai[i] + j]; /* block column value */ 244 if (itmp == bn){ 245 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 246 for (k=0; k<bs; k++) { 247 *cols_i++ = i*bs+k; 248 *v_i++ = aa_i[k]; 249 } 250 *ncols += bs; 251 break; 252 } 253 } 254 } 255 #endif 256 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 262 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 263 { 264 int ierr; 265 266 PetscFunctionBegin; 267 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 268 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 274 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 275 { 276 int ierr; 277 PetscFunctionBegin; 278 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 284 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 285 { 286 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 287 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 288 PetscScalar *aa; 289 FILE *file; 290 291 PetscFunctionBegin; 292 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 293 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 294 col_lens[0] = MAT_FILE_COOKIE; 295 296 col_lens[1] = A->m; 297 col_lens[2] = A->m; 298 col_lens[3] = a->s_nz*bs2; 299 300 /* store lengths of each row and write (including header) to file */ 301 count = 0; 302 for (i=0; i<a->mbs; i++) { 303 for (j=0; j<bs; j++) { 304 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 305 } 306 } 307 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 308 ierr = PetscFree(col_lens);CHKERRQ(ierr); 309 310 /* store column indices (zero start index) */ 311 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 312 count = 0; 313 for (i=0; i<a->mbs; i++) { 314 for (j=0; j<bs; j++) { 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 jj[count++] = bs*a->j[k] + l; 318 } 319 } 320 } 321 } 322 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 323 ierr = PetscFree(jj);CHKERRQ(ierr); 324 325 /* store nonzero values */ 326 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 327 count = 0; 328 for (i=0; i<a->mbs; i++) { 329 for (j=0; j<bs; j++) { 330 for (k=a->i[i]; k<a->i[i+1]; k++) { 331 for (l=0; l<bs; l++) { 332 aa[count++] = a->a[bs2*k + l*bs + j]; 333 } 334 } 335 } 336 } 337 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 338 ierr = PetscFree(aa);CHKERRQ(ierr); 339 340 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 341 if (file) { 342 fprintf(file,"-matload_block_size %d\n",a->bs); 343 } 344 PetscFunctionReturn(0); 345 } 346 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 349 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 350 { 351 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 352 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 353 char *name; 354 PetscViewerFormat format; 355 356 PetscFunctionBegin; 357 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 358 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 359 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 360 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 361 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 362 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 363 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 365 for (i=0; i<a->mbs; i++) { 366 for (j=0; j<bs; j++) { 367 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 368 for (k=a->i[i]; k<a->i[i+1]; k++) { 369 for (l=0; l<bs; l++) { 370 #if defined(PETSC_USE_COMPLEX) 371 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 372 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 373 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 374 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } 380 #else 381 if (a->a[bs2*k + l*bs + j] != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 383 } 384 #endif 385 } 386 } 387 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 #if defined(PETSC_USE_COMPLEX) 399 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 400 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 401 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 402 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 403 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 404 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } else { 406 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } 408 #else 409 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 410 #endif 411 } 412 } 413 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 414 } 415 } 416 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 417 } 418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 424 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 425 { 426 Mat A = (Mat) Aa; 427 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 428 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 429 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 430 MatScalar *aa; 431 MPI_Comm comm; 432 PetscViewer viewer; 433 434 PetscFunctionBegin; 435 /* 436 This is nasty. If this is called from an originally parallel matrix 437 then all processes call this,but only the first has the matrix so the 438 rest should return immediately. 439 */ 440 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 441 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 442 if (rank) PetscFunctionReturn(0); 443 444 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 445 446 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 447 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 448 449 /* loop over matrix elements drawing boxes */ 450 color = PETSC_DRAW_BLUE; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) >= 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 color = PETSC_DRAW_CYAN; 465 for (i=0,row=0; i<mbs; i++,row+=bs) { 466 for (j=a->i[i]; j<a->i[i+1]; j++) { 467 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 468 x_l = a->j[j]*bs; x_r = x_l + 1.0; 469 aa = a->a + j*bs2; 470 for (k=0; k<bs; k++) { 471 for (l=0; l<bs; l++) { 472 if (PetscRealPart(*aa++) != 0.) continue; 473 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 474 } 475 } 476 } 477 } 478 479 color = PETSC_DRAW_RED; 480 for (i=0,row=0; i<mbs; i++,row+=bs) { 481 for (j=a->i[i]; j<a->i[i+1]; j++) { 482 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 483 x_l = a->j[j]*bs; x_r = x_l + 1.0; 484 aa = a->a + j*bs2; 485 for (k=0; k<bs; k++) { 486 for (l=0; l<bs; l++) { 487 if (PetscRealPart(*aa++) <= 0.) continue; 488 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 489 } 490 } 491 } 492 } 493 PetscFunctionReturn(0); 494 } 495 496 #undef __FUNCT__ 497 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 498 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 499 { 500 int ierr; 501 PetscReal xl,yl,xr,yr,w,h; 502 PetscDraw draw; 503 PetscTruth isnull; 504 505 PetscFunctionBegin; 506 507 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 508 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 509 510 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 511 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 512 xr += w; yr += h; xl = -w; yl = -h; 513 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 514 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 515 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 516 PetscFunctionReturn(0); 517 } 518 519 #undef __FUNCT__ 520 #define __FUNCT__ "MatView_SeqSBAIJ" 521 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 522 { 523 int ierr; 524 PetscTruth issocket,isascii,isbinary,isdraw; 525 526 PetscFunctionBegin; 527 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 528 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 529 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 530 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 531 if (issocket) { 532 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 533 } else if (isascii){ 534 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 535 } else if (isbinary) { 536 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 537 } else if (isdraw) { 538 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 539 } else { 540 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 541 } 542 PetscFunctionReturn(0); 543 } 544 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 548 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 549 { 550 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 551 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 552 int *ai = a->i,*ailen = a->ilen; 553 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 554 MatScalar *ap,*aa = a->a,zero = 0.0; 555 556 PetscFunctionBegin; 557 for (k=0; k<m; k++) { /* loop over rows */ 558 row = im[k]; brow = row/bs; 559 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 560 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 561 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 562 nrow = ailen[brow]; 563 for (l=0; l<n; l++) { /* loop over columns */ 564 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 565 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 566 col = in[l] ; 567 bcol = col/bs; 568 cidx = col%bs; 569 ridx = row%bs; 570 high = nrow; 571 low = 0; /* assume unsorted */ 572 while (high-low > 5) { 573 t = (low+high)/2; 574 if (rp[t] > bcol) high = t; 575 else low = t; 576 } 577 for (i=low; i<high; i++) { 578 if (rp[i] > bcol) break; 579 if (rp[i] == bcol) { 580 *v++ = ap[bs2*i+bs*cidx+ridx]; 581 goto finished; 582 } 583 } 584 *v++ = zero; 585 finished:; 586 } 587 } 588 PetscFunctionReturn(0); 589 } 590 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 594 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 595 { 596 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 597 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 598 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 599 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 600 PetscTruth roworiented=a->roworiented; 601 MatScalar *value = v,*ap,*aa = a->a,*bap; 602 603 PetscFunctionBegin; 604 if (roworiented) { 605 stepval = (n-1)*bs; 606 } else { 607 stepval = (m-1)*bs; 608 } 609 for (k=0; k<m; k++) { /* loop over added rows */ 610 row = im[k]; 611 if (row < 0) continue; 612 #if defined(PETSC_USE_BOPT_g) 613 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 614 #endif 615 rp = aj + ai[row]; 616 ap = aa + bs2*ai[row]; 617 rmax = imax[row]; 618 nrow = ailen[row]; 619 low = 0; 620 for (l=0; l<n; l++) { /* loop over added columns */ 621 if (in[l] < 0) continue; 622 #if defined(PETSC_USE_BOPT_g) 623 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 624 #endif 625 col = in[l]; 626 if (col < row) continue; /* ignore lower triangular block */ 627 if (roworiented) { 628 value = v + k*(stepval+bs)*bs + l*bs; 629 } else { 630 value = v + l*(stepval+bs)*bs + k*bs; 631 } 632 if (!sorted) low = 0; high = nrow; 633 while (high-low > 7) { 634 t = (low+high)/2; 635 if (rp[t] > col) high = t; 636 else low = t; 637 } 638 for (i=low; i<high; i++) { 639 if (rp[i] > col) break; 640 if (rp[i] == col) { 641 bap = ap + bs2*i; 642 if (roworiented) { 643 if (is == ADD_VALUES) { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=ii; jj<bs2; jj+=bs) { 646 bap[jj] += *value++; 647 } 648 } 649 } else { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=ii; jj<bs2; jj+=bs) { 652 bap[jj] = *value++; 653 } 654 } 655 } 656 } else { 657 if (is == ADD_VALUES) { 658 for (ii=0; ii<bs; ii++,value+=stepval) { 659 for (jj=0; jj<bs; jj++) { 660 *bap++ += *value++; 661 } 662 } 663 } else { 664 for (ii=0; ii<bs; ii++,value+=stepval) { 665 for (jj=0; jj<bs; jj++) { 666 *bap++ = *value++; 667 } 668 } 669 } 670 } 671 goto noinsert2; 672 } 673 } 674 if (nonew == 1) goto noinsert2; 675 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 676 if (nrow >= rmax) { 677 /* there is no extra room in row, therefore enlarge */ 678 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 679 MatScalar *new_a; 680 681 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 682 683 /* malloc new storage space */ 684 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 685 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 686 new_j = (int*)(new_a + bs2*new_nz); 687 new_i = new_j + new_nz; 688 689 /* copy over old data into new slots */ 690 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 691 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 692 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 693 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 694 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 695 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 696 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 697 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 698 /* free up old matrix storage */ 699 ierr = PetscFree(a->a);CHKERRQ(ierr); 700 if (!a->singlemalloc) { 701 ierr = PetscFree(a->i);CHKERRQ(ierr); 702 ierr = PetscFree(a->j);CHKERRQ(ierr); 703 } 704 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 705 a->singlemalloc = PETSC_TRUE; 706 707 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 708 rmax = imax[row] = imax[row] + CHUNKSIZE; 709 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 710 a->s_maxnz += bs2*CHUNKSIZE; 711 a->reallocs++; 712 a->s_nz++; 713 } 714 N = nrow++ - 1; 715 /* shift up all the later entries in this row */ 716 for (ii=N; ii>=i; ii--) { 717 rp[ii+1] = rp[ii]; 718 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 719 } 720 if (N >= i) { 721 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 722 } 723 rp[i] = col; 724 bap = ap + bs2*i; 725 if (roworiented) { 726 for (ii=0; ii<bs; ii++,value+=stepval) { 727 for (jj=ii; jj<bs2; jj+=bs) { 728 bap[jj] = *value++; 729 } 730 } 731 } else { 732 for (ii=0; ii<bs; ii++,value+=stepval) { 733 for (jj=0; jj<bs; jj++) { 734 *bap++ = *value++; 735 } 736 } 737 } 738 noinsert2:; 739 low = i; 740 } 741 ailen[row] = nrow; 742 } 743 PetscFunctionReturn(0); 744 } 745 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 748 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 749 { 750 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 751 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 752 int m = A->m,*ip,N,*ailen = a->ilen; 753 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 754 MatScalar *aa = a->a,*ap; 755 #if defined(PETSC_HAVE_SPOOLES) 756 PetscTruth flag; 757 #endif 758 759 PetscFunctionBegin; 760 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 761 762 if (m) rmax = ailen[0]; 763 for (i=1; i<mbs; i++) { 764 /* move each row back by the amount of empty slots (fshift) before it*/ 765 fshift += imax[i-1] - ailen[i-1]; 766 rmax = PetscMax(rmax,ailen[i]); 767 if (fshift) { 768 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 769 N = ailen[i]; 770 for (j=0; j<N; j++) { 771 ip[j-fshift] = ip[j]; 772 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 773 } 774 } 775 ai[i] = ai[i-1] + ailen[i-1]; 776 } 777 if (mbs) { 778 fshift += imax[mbs-1] - ailen[mbs-1]; 779 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 780 } 781 /* reset ilen and imax for each row */ 782 for (i=0; i<mbs; i++) { 783 ailen[i] = imax[i] = ai[i+1] - ai[i]; 784 } 785 a->s_nz = ai[mbs]; 786 787 /* diagonals may have moved, reset it */ 788 if (a->diag) { 789 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 790 } 791 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 792 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 793 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 794 a->reallocs); 795 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 796 a->reallocs = 0; 797 A->info.nz_unneeded = (PetscReal)fshift*bs2; 798 799 #if defined(PETSC_HAVE_SPOOLES) 800 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 801 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 802 #endif 803 804 PetscFunctionReturn(0); 805 } 806 807 /* 808 This function returns an array of flags which indicate the locations of contiguous 809 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 810 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 811 Assume: sizes should be long enough to hold all the values. 812 */ 813 #undef __FUNCT__ 814 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 815 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 816 { 817 int i,j,k,row; 818 PetscTruth flg; 819 820 PetscFunctionBegin; 821 for (i=0,j=0; i<n; j++) { 822 row = idx[i]; 823 if (row%bs!=0) { /* Not the begining of a block */ 824 sizes[j] = 1; 825 i++; 826 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 827 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 828 i++; 829 } else { /* Begining of the block, so check if the complete block exists */ 830 flg = PETSC_TRUE; 831 for (k=1; k<bs; k++) { 832 if (row+k != idx[i+k]) { /* break in the block */ 833 flg = PETSC_FALSE; 834 break; 835 } 836 } 837 if (flg == PETSC_TRUE) { /* No break in the bs */ 838 sizes[j] = bs; 839 i+= bs; 840 } else { 841 sizes[j] = 1; 842 i++; 843 } 844 } 845 } 846 *bs_max = j; 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 852 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 853 { 854 PetscFunctionBegin; 855 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 856 } 857 858 /* Only add/insert a(i,j) with i<=j (blocks). 859 Any a(i,j) with i>j input by user is ingored. 860 */ 861 862 #undef __FUNCT__ 863 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 864 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 865 { 866 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 867 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 868 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 869 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 870 int ridx,cidx,bs2=a->bs2,ierr; 871 MatScalar *ap,value,*aa=a->a,*bap; 872 873 PetscFunctionBegin; 874 875 for (k=0; k<m; k++) { /* loop over added rows */ 876 row = im[k]; /* row number */ 877 brow = row/bs; /* block row number */ 878 if (row < 0) continue; 879 #if defined(PETSC_USE_BOPT_g) 880 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 881 #endif 882 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 883 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 884 rmax = imax[brow]; /* maximum space allocated for this row */ 885 nrow = ailen[brow]; /* actual length of this row */ 886 low = 0; 887 888 for (l=0; l<n; l++) { /* loop over added columns */ 889 if (in[l] < 0) continue; 890 #if defined(PETSC_USE_BOPT_g) 891 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 892 #endif 893 col = in[l]; 894 bcol = col/bs; /* block col number */ 895 896 if (brow <= bcol){ 897 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 898 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 899 /* element value a(k,l) */ 900 if (roworiented) { 901 value = v[l + k*n]; 902 } else { 903 value = v[k + l*m]; 904 } 905 906 /* move pointer bap to a(k,l) quickly and add/insert value */ 907 if (!sorted) low = 0; high = nrow; 908 while (high-low > 7) { 909 t = (low+high)/2; 910 if (rp[t] > bcol) high = t; 911 else low = t; 912 } 913 for (i=low; i<high; i++) { 914 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 915 if (rp[i] > bcol) break; 916 if (rp[i] == bcol) { 917 bap = ap + bs2*i + bs*cidx + ridx; 918 if (is == ADD_VALUES) *bap += value; 919 else *bap = value; 920 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 921 if (brow == bcol && ridx < cidx){ 922 bap = ap + bs2*i + bs*ridx + cidx; 923 if (is == ADD_VALUES) *bap += value; 924 else *bap = value; 925 } 926 goto noinsert1; 927 } 928 } 929 930 if (nonew == 1) goto noinsert1; 931 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 932 if (nrow >= rmax) { 933 /* there is no extra room in row, therefore enlarge */ 934 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 935 MatScalar *new_a; 936 937 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 938 939 /* Malloc new storage space */ 940 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 941 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 942 new_j = (int*)(new_a + bs2*new_nz); 943 new_i = new_j + new_nz; 944 945 /* copy over old data into new slots */ 946 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 947 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 948 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 949 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 950 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 951 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 952 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 953 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 954 /* free up old matrix storage */ 955 ierr = PetscFree(a->a);CHKERRQ(ierr); 956 if (!a->singlemalloc) { 957 ierr = PetscFree(a->i);CHKERRQ(ierr); 958 ierr = PetscFree(a->j);CHKERRQ(ierr); 959 } 960 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 961 a->singlemalloc = PETSC_TRUE; 962 963 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 964 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 965 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 966 a->s_maxnz += bs2*CHUNKSIZE; 967 a->reallocs++; 968 a->s_nz++; 969 } 970 971 N = nrow++ - 1; 972 /* shift up all the later entries in this row */ 973 for (ii=N; ii>=i; ii--) { 974 rp[ii+1] = rp[ii]; 975 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 976 } 977 if (N>=i) { 978 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 979 } 980 rp[i] = bcol; 981 ap[bs2*i + bs*cidx + ridx] = value; 982 noinsert1:; 983 low = i; 984 /* } */ 985 } 986 } /* end of if .. if.. */ 987 } /* end of loop over added columns */ 988 ailen[brow] = nrow; 989 } /* end of loop over added rows */ 990 991 PetscFunctionReturn(0); 992 } 993 994 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 995 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 996 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 997 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 998 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 999 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1000 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1001 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1002 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1003 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1004 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1005 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1006 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1007 extern int MatZeroEntries_SeqSBAIJ(Mat); 1008 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1009 1010 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1011 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1012 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1013 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1014 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1017 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1018 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1019 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1020 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1021 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1024 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1025 1026 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1027 1028 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1029 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1030 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1031 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1035 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1036 1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1045 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1046 1047 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1048 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1049 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1050 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1051 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1054 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1055 1056 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1057 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1058 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1059 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1060 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1063 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1064 1065 #ifdef HAVE_MatICCFactor 1066 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1069 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1070 { 1071 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1072 Mat outA; 1073 int ierr; 1074 PetscTruth row_identity,col_identity; 1075 1076 PetscFunctionBegin; 1077 /* 1078 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1079 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1080 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1081 if (!row_identity || !col_identity) { 1082 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1083 } 1084 */ 1085 1086 outA = inA; 1087 inA->factor = FACTOR_CHOLESKY; 1088 1089 if (!a->diag) { 1090 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1091 } 1092 /* 1093 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1094 for ILU(0) factorization with natural ordering 1095 */ 1096 switch (a->bs) { 1097 case 1: 1098 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1099 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1100 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1101 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1102 case 2: 1103 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1104 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1105 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1106 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1107 break; 1108 case 3: 1109 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1110 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1111 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1112 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1113 break; 1114 case 4: 1115 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1116 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1117 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1118 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1119 break; 1120 case 5: 1121 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1122 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1123 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1124 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1125 break; 1126 case 6: 1127 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1128 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1129 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1130 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1131 break; 1132 case 7: 1133 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1134 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1135 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1136 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1137 break; 1138 default: 1139 a->row = row; 1140 a->icol = col; 1141 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1142 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1143 1144 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1145 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1146 PetscLogObjectParent(inA,a->icol); 1147 1148 if (!a->solve_work) { 1149 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1150 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1151 } 1152 } 1153 1154 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1155 1156 PetscFunctionReturn(0); 1157 } 1158 #endif 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1162 int MatPrintHelp_SeqSBAIJ(Mat A) 1163 { 1164 static PetscTruth called = PETSC_FALSE; 1165 MPI_Comm comm = A->comm; 1166 int ierr; 1167 1168 PetscFunctionBegin; 1169 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1170 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1171 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 EXTERN_C_BEGIN 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1178 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1179 { 1180 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1181 int i,nz,n; 1182 1183 PetscFunctionBegin; 1184 nz = baij->s_maxnz; 1185 n = mat->n; 1186 for (i=0; i<nz; i++) { 1187 baij->j[i] = indices[i]; 1188 } 1189 baij->s_nz = nz; 1190 for (i=0; i<n; i++) { 1191 baij->ilen[i] = baij->imax[i]; 1192 } 1193 1194 PetscFunctionReturn(0); 1195 } 1196 EXTERN_C_END 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1200 /*@ 1201 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1202 in the matrix. 1203 1204 Input Parameters: 1205 + mat - the SeqSBAIJ matrix 1206 - indices - the column indices 1207 1208 Level: advanced 1209 1210 Notes: 1211 This can be called if you have precomputed the nonzero structure of the 1212 matrix and want to provide it to the matrix object to improve the performance 1213 of the MatSetValues() operation. 1214 1215 You MUST have set the correct numbers of nonzeros per row in the call to 1216 MatCreateSeqSBAIJ(). 1217 1218 MUST be called before any calls to MatSetValues() 1219 1220 .seealso: MatCreateSeqSBAIJ 1221 @*/ 1222 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1223 { 1224 int ierr,(*f)(Mat,int *); 1225 1226 PetscFunctionBegin; 1227 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1228 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1229 if (f) { 1230 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1231 } else { 1232 SETERRQ(1,"Wrong type of matrix to set column indices"); 1233 } 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1239 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1240 { 1241 int ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #undef __FUNCT__ 1249 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1250 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar **array) 1251 { 1252 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1253 PetscFunctionBegin; 1254 *array = a->a; 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1260 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar **array) 1261 { 1262 PetscFunctionBegin; 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #include "petscblaslapack.h" 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1269 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1270 { 1271 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1272 int ierr,one=1,i,bs=y->bs,bs2,j; 1273 1274 PetscFunctionBegin; 1275 if (str == SAME_NONZERO_PATTERN) { 1276 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1277 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1278 if (y->xtoy && y->XtoY != X) { 1279 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1280 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1281 } 1282 if (!y->xtoy) { /* get xtoy */ 1283 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1284 y->XtoY = X; 1285 } 1286 bs2 = bs*bs; 1287 for (i=0; i<x->s_nz; i++) { 1288 j = 0; 1289 while (j < bs2){ 1290 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1291 j++; 1292 } 1293 } 1294 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1295 } else { 1296 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 /* -------------------------------------------------------------------*/ 1302 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1303 MatGetRow_SeqSBAIJ, 1304 MatRestoreRow_SeqSBAIJ, 1305 MatMult_SeqSBAIJ_N, 1306 MatMultAdd_SeqSBAIJ_N, 1307 MatMultTranspose_SeqSBAIJ, 1308 MatMultTransposeAdd_SeqSBAIJ, 1309 MatSolve_SeqSBAIJ_N, 1310 0, 1311 0, 1312 0, 1313 0, 1314 MatCholeskyFactor_SeqSBAIJ, 1315 MatRelax_SeqSBAIJ, 1316 MatTranspose_SeqSBAIJ, 1317 MatGetInfo_SeqSBAIJ, 1318 MatEqual_SeqSBAIJ, 1319 MatGetDiagonal_SeqSBAIJ, 1320 MatDiagonalScale_SeqSBAIJ, 1321 MatNorm_SeqSBAIJ, 1322 0, 1323 MatAssemblyEnd_SeqSBAIJ, 1324 0, 1325 MatSetOption_SeqSBAIJ, 1326 MatZeroEntries_SeqSBAIJ, 1327 MatZeroRows_SeqSBAIJ, 1328 0, 1329 0, 1330 MatCholeskyFactorSymbolic_SeqSBAIJ, 1331 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1332 MatSetUpPreallocation_SeqSBAIJ, 1333 0, 1334 MatICCFactorSymbolic_SeqSBAIJ, 1335 MatGetArray_SeqSBAIJ, 1336 MatRestoreArray_SeqSBAIJ, 1337 MatDuplicate_SeqSBAIJ, 1338 0, 1339 0, 1340 0, 1341 0, 1342 MatAXPY_SeqSBAIJ, 1343 MatGetSubMatrices_SeqSBAIJ, 1344 MatIncreaseOverlap_SeqSBAIJ, 1345 MatGetValues_SeqSBAIJ, 1346 0, 1347 MatPrintHelp_SeqSBAIJ, 1348 MatScale_SeqSBAIJ, 1349 0, 1350 0, 1351 0, 1352 MatGetBlockSize_SeqSBAIJ, 1353 MatGetRowIJ_SeqSBAIJ, 1354 MatRestoreRowIJ_SeqSBAIJ, 1355 0, 1356 0, 1357 0, 1358 0, 1359 0, 1360 0, 1361 MatSetValuesBlocked_SeqSBAIJ, 1362 MatGetSubMatrix_SeqSBAIJ, 1363 0, 1364 0, 1365 MatGetPetscMaps_Petsc, 1366 0, 1367 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 MatGetRowMax_SeqSBAIJ, 1373 0, 1374 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 #if !defined(PETSC_USE_COMPLEX) 1387 MatGetInertia_SeqSBAIJ 1388 #else 1389 0 1390 #endif 1391 }; 1392 1393 EXTERN_C_BEGIN 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1396 int MatStoreValues_SeqSBAIJ(Mat mat) 1397 { 1398 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1399 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1400 int ierr; 1401 1402 PetscFunctionBegin; 1403 if (aij->nonew != 1) { 1404 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1405 } 1406 1407 /* allocate space for values if not already there */ 1408 if (!aij->saved_values) { 1409 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1410 } 1411 1412 /* copy values over */ 1413 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 EXTERN_C_END 1417 1418 EXTERN_C_BEGIN 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1421 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1422 { 1423 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1424 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1425 1426 PetscFunctionBegin; 1427 if (aij->nonew != 1) { 1428 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1429 } 1430 if (!aij->saved_values) { 1431 SETERRQ(1,"Must call MatStoreValues(A);first"); 1432 } 1433 1434 /* copy values over */ 1435 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 EXTERN_C_END 1439 1440 EXTERN_C_BEGIN 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1443 int MatCreate_SeqSBAIJ(Mat B) 1444 { 1445 Mat_SeqSBAIJ *b; 1446 int ierr,size; 1447 1448 PetscFunctionBegin; 1449 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1450 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1451 B->m = B->M = PetscMax(B->m,B->M); 1452 B->n = B->N = PetscMax(B->n,B->N); 1453 1454 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1455 B->data = (void*)b; 1456 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1457 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1458 B->ops->destroy = MatDestroy_SeqSBAIJ; 1459 B->ops->view = MatView_SeqSBAIJ; 1460 B->factor = 0; 1461 B->lupivotthreshold = 1.0; 1462 B->mapping = 0; 1463 b->row = 0; 1464 b->icol = 0; 1465 b->reallocs = 0; 1466 b->saved_values = 0; 1467 1468 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1469 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1470 1471 b->sorted = PETSC_FALSE; 1472 b->roworiented = PETSC_TRUE; 1473 b->nonew = 0; 1474 b->diag = 0; 1475 b->solve_work = 0; 1476 b->mult_work = 0; 1477 B->spptr = 0; 1478 b->keepzeroedrows = PETSC_FALSE; 1479 b->xtoy = 0; 1480 b->XtoY = 0; 1481 1482 b->inew = 0; 1483 b->jnew = 0; 1484 b->anew = 0; 1485 b->a2anew = 0; 1486 b->permute = PETSC_FALSE; 1487 1488 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1489 "MatStoreValues_SeqSBAIJ", 1490 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1491 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1492 "MatRetrieveValues_SeqSBAIJ", 1493 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1494 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1495 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1496 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 EXTERN_C_END 1500 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1503 /*@C 1504 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1505 compressed row) format. For good matrix assembly performance the 1506 user should preallocate the matrix storage by setting the parameter nz 1507 (or the array nnz). By setting these parameters accurately, performance 1508 during matrix assembly can be increased by more than a factor of 50. 1509 1510 Collective on Mat 1511 1512 Input Parameters: 1513 + A - the symmetric matrix 1514 . bs - size of block 1515 . nz - number of block nonzeros per block row (same for all rows) 1516 - nnz - array containing the number of block nonzeros in the upper triangular plus 1517 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1518 1519 Options Database Keys: 1520 . -mat_no_unroll - uses code that does not unroll the loops in the 1521 block calculations (much slower) 1522 . -mat_block_size - size of the blocks to use 1523 1524 Level: intermediate 1525 1526 Notes: 1527 Specify the preallocated storage with either nz or nnz (not both). 1528 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1529 allocation. For additional details, see the users manual chapter on 1530 matrices. 1531 1532 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1533 @*/ 1534 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1535 { 1536 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1537 int i,len,ierr,mbs,bs2; 1538 PetscTruth flg; 1539 int s_nz; 1540 1541 PetscFunctionBegin; 1542 B->preallocated = PETSC_TRUE; 1543 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1544 mbs = B->m/bs; 1545 bs2 = bs*bs; 1546 1547 if (mbs*bs != B->m) { 1548 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1549 } 1550 1551 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1552 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1553 if (nnz) { 1554 for (i=0; i<mbs; i++) { 1555 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1556 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1557 } 1558 } 1559 1560 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1561 if (!flg) { 1562 switch (bs) { 1563 case 1: 1564 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1565 B->ops->solve = MatSolve_SeqSBAIJ_1; 1566 B->ops->solves = MatSolves_SeqSBAIJ_1; 1567 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1568 B->ops->mult = MatMult_SeqSBAIJ_1; 1569 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1570 break; 1571 case 2: 1572 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1573 B->ops->solve = MatSolve_SeqSBAIJ_2; 1574 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1575 B->ops->mult = MatMult_SeqSBAIJ_2; 1576 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1577 break; 1578 case 3: 1579 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1580 B->ops->solve = MatSolve_SeqSBAIJ_3; 1581 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1582 B->ops->mult = MatMult_SeqSBAIJ_3; 1583 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1584 break; 1585 case 4: 1586 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1587 B->ops->solve = MatSolve_SeqSBAIJ_4; 1588 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1589 B->ops->mult = MatMult_SeqSBAIJ_4; 1590 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1591 break; 1592 case 5: 1593 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1594 B->ops->solve = MatSolve_SeqSBAIJ_5; 1595 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1596 B->ops->mult = MatMult_SeqSBAIJ_5; 1597 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1598 break; 1599 case 6: 1600 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1601 B->ops->solve = MatSolve_SeqSBAIJ_6; 1602 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1603 B->ops->mult = MatMult_SeqSBAIJ_6; 1604 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1605 break; 1606 case 7: 1607 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1608 B->ops->solve = MatSolve_SeqSBAIJ_7; 1609 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1610 B->ops->mult = MatMult_SeqSBAIJ_7; 1611 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1612 break; 1613 } 1614 } 1615 1616 b->mbs = mbs; 1617 b->nbs = mbs; 1618 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1619 if (!nnz) { 1620 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1621 else if (nz <= 0) nz = 1; 1622 for (i=0; i<mbs; i++) { 1623 b->imax[i] = nz; 1624 } 1625 nz = nz*mbs; /* total nz */ 1626 } else { 1627 nz = 0; 1628 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1629 } 1630 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1631 s_nz = nz; 1632 1633 /* allocate the matrix space */ 1634 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1635 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1636 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1637 b->j = (int*)(b->a + s_nz*bs2); 1638 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1639 b->i = b->j + s_nz; 1640 b->singlemalloc = PETSC_TRUE; 1641 1642 /* pointer to beginning of each row */ 1643 b->i[0] = 0; 1644 for (i=1; i<mbs+1; i++) { 1645 b->i[i] = b->i[i-1] + b->imax[i-1]; 1646 } 1647 1648 /* b->ilen will count nonzeros in each block row so far. */ 1649 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1650 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1651 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1652 1653 b->bs = bs; 1654 b->bs2 = bs2; 1655 b->s_nz = 0; 1656 b->s_maxnz = s_nz*bs2; 1657 1658 b->inew = 0; 1659 b->jnew = 0; 1660 b->anew = 0; 1661 b->a2anew = 0; 1662 b->permute = PETSC_FALSE; 1663 PetscFunctionReturn(0); 1664 } 1665 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatCreateSeqSBAIJ" 1669 /*@C 1670 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1671 compressed row) format. For good matrix assembly performance the 1672 user should preallocate the matrix storage by setting the parameter nz 1673 (or the array nnz). By setting these parameters accurately, performance 1674 during matrix assembly can be increased by more than a factor of 50. 1675 1676 Collective on MPI_Comm 1677 1678 Input Parameters: 1679 + comm - MPI communicator, set to PETSC_COMM_SELF 1680 . bs - size of block 1681 . m - number of rows, or number of columns 1682 . nz - number of block nonzeros per block row (same for all rows) 1683 - nnz - array containing the number of block nonzeros in the upper triangular plus 1684 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1685 1686 Output Parameter: 1687 . A - the symmetric matrix 1688 1689 Options Database Keys: 1690 . -mat_no_unroll - uses code that does not unroll the loops in the 1691 block calculations (much slower) 1692 . -mat_block_size - size of the blocks to use 1693 1694 Level: intermediate 1695 1696 Notes: 1697 1698 Specify the preallocated storage with either nz or nnz (not both). 1699 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1700 allocation. For additional details, see the users manual chapter on 1701 matrices. 1702 1703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1704 @*/ 1705 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1706 { 1707 int ierr; 1708 1709 PetscFunctionBegin; 1710 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1711 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1712 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1713 PetscFunctionReturn(0); 1714 } 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1718 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1719 { 1720 Mat C; 1721 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1722 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1723 1724 PetscFunctionBegin; 1725 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1726 1727 *B = 0; 1728 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1729 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1730 c = (Mat_SeqSBAIJ*)C->data; 1731 1732 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1733 C->preallocated = PETSC_TRUE; 1734 C->factor = A->factor; 1735 c->row = 0; 1736 c->icol = 0; 1737 c->saved_values = 0; 1738 c->keepzeroedrows = a->keepzeroedrows; 1739 C->assembled = PETSC_TRUE; 1740 1741 c->bs = a->bs; 1742 c->bs2 = a->bs2; 1743 c->mbs = a->mbs; 1744 c->nbs = a->nbs; 1745 1746 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1747 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1748 for (i=0; i<mbs; i++) { 1749 c->imax[i] = a->imax[i]; 1750 c->ilen[i] = a->ilen[i]; 1751 } 1752 1753 /* allocate the matrix space */ 1754 c->singlemalloc = PETSC_TRUE; 1755 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1756 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1757 c->j = (int*)(c->a + nz*bs2); 1758 c->i = c->j + nz; 1759 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1760 if (mbs > 0) { 1761 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1762 if (cpvalues == MAT_COPY_VALUES) { 1763 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1764 } else { 1765 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1766 } 1767 } 1768 1769 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1770 c->sorted = a->sorted; 1771 c->roworiented = a->roworiented; 1772 c->nonew = a->nonew; 1773 1774 if (a->diag) { 1775 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1776 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1777 for (i=0; i<mbs; i++) { 1778 c->diag[i] = a->diag[i]; 1779 } 1780 } else c->diag = 0; 1781 c->s_nz = a->s_nz; 1782 c->s_maxnz = a->s_maxnz; 1783 c->solve_work = 0; 1784 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1785 c->mult_work = 0; 1786 *B = C; 1787 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1788 PetscFunctionReturn(0); 1789 } 1790 1791 EXTERN_C_BEGIN 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1794 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1795 { 1796 Mat_SeqSBAIJ *a; 1797 Mat B; 1798 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1799 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1800 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1801 int *masked,nmask,tmp,bs2,ishift; 1802 PetscScalar *aa; 1803 MPI_Comm comm = ((PetscObject)viewer)->comm; 1804 #if defined(PETSC_HAVE_SPOOLES) 1805 PetscTruth flag; 1806 #endif 1807 1808 PetscFunctionBegin; 1809 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1810 bs2 = bs*bs; 1811 1812 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1813 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1814 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1815 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1816 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1817 M = header[1]; N = header[2]; nz = header[3]; 1818 1819 if (header[3] < 0) { 1820 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1821 } 1822 1823 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1824 1825 /* 1826 This code adds extra rows to make sure the number of rows is 1827 divisible by the blocksize 1828 */ 1829 mbs = M/bs; 1830 extra_rows = bs - M + bs*(mbs); 1831 if (extra_rows == bs) extra_rows = 0; 1832 else mbs++; 1833 if (extra_rows) { 1834 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1835 } 1836 1837 /* read in row lengths */ 1838 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1839 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1840 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1841 1842 /* read in column indices */ 1843 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1844 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1845 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1846 1847 /* loop over row lengths determining block row lengths */ 1848 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1849 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1850 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1851 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1852 masked = mask + mbs; 1853 rowcount = 0; nzcount = 0; 1854 for (i=0; i<mbs; i++) { 1855 nmask = 0; 1856 for (j=0; j<bs; j++) { 1857 kmax = rowlengths[rowcount]; 1858 for (k=0; k<kmax; k++) { 1859 tmp = jj[nzcount++]/bs; /* block col. index */ 1860 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1861 } 1862 rowcount++; 1863 } 1864 s_browlengths[i] += nmask; 1865 1866 /* zero out the mask elements we set */ 1867 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1868 } 1869 1870 /* create our matrix */ 1871 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1872 B = *A; 1873 a = (Mat_SeqSBAIJ*)B->data; 1874 1875 /* set matrix "i" values */ 1876 a->i[0] = 0; 1877 for (i=1; i<= mbs; i++) { 1878 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1879 a->ilen[i-1] = s_browlengths[i-1]; 1880 } 1881 a->s_nz = a->i[mbs]; 1882 1883 /* read in nonzero values */ 1884 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1885 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1886 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1887 1888 /* set "a" and "j" values into matrix */ 1889 nzcount = 0; jcount = 0; 1890 for (i=0; i<mbs; i++) { 1891 nzcountb = nzcount; 1892 nmask = 0; 1893 for (j=0; j<bs; j++) { 1894 kmax = rowlengths[i*bs+j]; 1895 for (k=0; k<kmax; k++) { 1896 tmp = jj[nzcount++]/bs; /* block col. index */ 1897 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1898 } 1899 } 1900 /* sort the masked values */ 1901 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1902 1903 /* set "j" values into matrix */ 1904 maskcount = 1; 1905 for (j=0; j<nmask; j++) { 1906 a->j[jcount++] = masked[j]; 1907 mask[masked[j]] = maskcount++; 1908 } 1909 1910 /* set "a" values into matrix */ 1911 ishift = bs2*a->i[i]; 1912 for (j=0; j<bs; j++) { 1913 kmax = rowlengths[i*bs+j]; 1914 for (k=0; k<kmax; k++) { 1915 tmp = jj[nzcountb]/bs ; /* block col. index */ 1916 if (tmp >= i){ 1917 block = mask[tmp] - 1; 1918 point = jj[nzcountb] - bs*tmp; 1919 idx = ishift + bs2*block + j + bs*point; 1920 a->a[idx] = aa[nzcountb]; 1921 } 1922 nzcountb++; 1923 } 1924 } 1925 /* zero out the mask elements we set */ 1926 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1927 } 1928 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1929 1930 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1931 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1932 ierr = PetscFree(aa);CHKERRQ(ierr); 1933 ierr = PetscFree(jj);CHKERRQ(ierr); 1934 ierr = PetscFree(mask);CHKERRQ(ierr); 1935 1936 B->assembled = PETSC_TRUE; 1937 #if defined(PETSC_HAVE_SPOOLES) 1938 ierr = PetscOptionsHasName(B->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 1939 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(B);CHKERRQ(ierr); } 1940 #endif 1941 ierr = MatView_Private(B);CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 EXTERN_C_END 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1948 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1949 { 1950 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1951 MatScalar *aa=a->a,*v,*v1; 1952 PetscScalar *x,*b,*t,sum,d; 1953 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1954 int nz,nz1,*vj,*vj1,i; 1955 1956 PetscFunctionBegin; 1957 its = its*lits; 1958 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1959 1960 if (bs > 1) 1961 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1962 1963 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1964 if (xx != bb) { 1965 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1966 } else { 1967 b = x; 1968 } 1969 1970 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1971 1972 if (flag & SOR_ZERO_INITIAL_GUESS) { 1973 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1974 for (i=0; i<m; i++) 1975 t[i] = b[i]; 1976 1977 for (i=0; i<m; i++){ 1978 d = *(aa + ai[i]); /* diag[i] */ 1979 v = aa + ai[i] + 1; 1980 vj = aj + ai[i] + 1; 1981 nz = ai[i+1] - ai[i] - 1; 1982 x[i] = omega*t[i]/d; 1983 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1984 PetscLogFlops(2*nz-1); 1985 } 1986 } 1987 1988 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1989 for (i=0; i<m; i++) 1990 t[i] = b[i]; 1991 1992 for (i=0; i<m-1; i++){ /* update rhs */ 1993 v = aa + ai[i] + 1; 1994 vj = aj + ai[i] + 1; 1995 nz = ai[i+1] - ai[i] - 1; 1996 while (nz--) t[*vj++] -= x[i]*(*v++); 1997 PetscLogFlops(2*nz-1); 1998 } 1999 for (i=m-1; i>=0; i--){ 2000 d = *(aa + ai[i]); 2001 v = aa + ai[i] + 1; 2002 vj = aj + ai[i] + 1; 2003 nz = ai[i+1] - ai[i] - 1; 2004 sum = t[i]; 2005 while (nz--) sum -= x[*vj++]*(*v++); 2006 PetscLogFlops(2*nz-1); 2007 x[i] = (1-omega)*x[i] + omega*sum/d; 2008 } 2009 } 2010 its--; 2011 } 2012 2013 while (its--) { 2014 /* 2015 forward sweep: 2016 for i=0,...,m-1: 2017 sum[i] = (b[i] - U(i,:)x )/d[i]; 2018 x[i] = (1-omega)x[i] + omega*sum[i]; 2019 b = b - x[i]*U^T(i,:); 2020 2021 */ 2022 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2023 for (i=0; i<m; i++) 2024 t[i] = b[i]; 2025 2026 for (i=0; i<m; i++){ 2027 d = *(aa + ai[i]); /* diag[i] */ 2028 v = aa + ai[i] + 1; v1=v; 2029 vj = aj + ai[i] + 1; vj1=vj; 2030 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2031 sum = t[i]; 2032 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2033 x[i] = (1-omega)*x[i] + omega*sum/d; 2034 while (nz--) t[*vj++] -= x[i]*(*v++); 2035 PetscLogFlops(4*nz-2); 2036 } 2037 } 2038 2039 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2040 /* 2041 backward sweep: 2042 b = b - x[i]*U^T(i,:), i=0,...,n-2 2043 for i=m-1,...,0: 2044 sum[i] = (b[i] - U(i,:)x )/d[i]; 2045 x[i] = (1-omega)x[i] + omega*sum[i]; 2046 */ 2047 for (i=0; i<m; i++) 2048 t[i] = b[i]; 2049 2050 for (i=0; i<m-1; i++){ /* update rhs */ 2051 v = aa + ai[i] + 1; 2052 vj = aj + ai[i] + 1; 2053 nz = ai[i+1] - ai[i] - 1; 2054 while (nz--) t[*vj++] -= x[i]*(*v++); 2055 PetscLogFlops(2*nz-1); 2056 } 2057 for (i=m-1; i>=0; i--){ 2058 d = *(aa + ai[i]); 2059 v = aa + ai[i] + 1; 2060 vj = aj + ai[i] + 1; 2061 nz = ai[i+1] - ai[i] - 1; 2062 sum = t[i]; 2063 while (nz--) sum -= x[*vj++]*(*v++); 2064 PetscLogFlops(2*nz-1); 2065 x[i] = (1-omega)*x[i] + omega*sum/d; 2066 } 2067 } 2068 } 2069 2070 ierr = PetscFree(t); CHKERRQ(ierr); 2071 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2072 if (bb != xx) { 2073 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2074 } 2075 2076 PetscFunctionReturn(0); 2077 } 2078 2079 2080 2081 2082 2083 2084