1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 case MAT_NOT_SYMMETRIC: 183 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 184 case MAT_HERMITIAN: 185 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 186 case MAT_SYMMETRIC: 187 case MAT_STRUCTURALLY_SYMMETRIC: 188 case MAT_NOT_HERMITIAN: 189 case MAT_SYMMETRY_ETERNAL: 190 case MAT_NOT_SYMMETRY_ETERNAL: 191 break; 192 default: 193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 200 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 201 { 202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 203 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 204 MatScalar *aa,*aa_i; 205 PetscScalar *v_i; 206 207 PetscFunctionBegin; 208 bs = a->bs; 209 ai = a->i; 210 aj = a->j; 211 aa = a->a; 212 bs2 = a->bs2; 213 214 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 215 216 bn = row/bs; /* Block number */ 217 bp = row % bs; /* Block position */ 218 M = ai[bn+1] - ai[bn]; 219 *ncols = bs*M; 220 221 if (v) { 222 *v = 0; 223 if (*ncols) { 224 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 225 for (i=0; i<M; i++) { /* for each block in the block row */ 226 v_i = *v + i*bs; 227 aa_i = aa + bs2*(ai[bn] + i); 228 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 229 } 230 } 231 } 232 233 if (cols) { 234 *cols = 0; 235 if (*ncols) { 236 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 237 for (i=0; i<M; i++) { /* for each block in the block row */ 238 cols_i = *cols + i*bs; 239 itmp = bs*aj[ai[bn] + i]; 240 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 241 } 242 } 243 } 244 245 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 246 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 247 #ifdef column_search 248 v_i = *v + M*bs; 249 cols_i = *cols + M*bs; 250 for (i=0; i<bn; i++){ /* for each block row */ 251 M = ai[i+1] - ai[i]; 252 for (j=0; j<M; j++){ 253 itmp = aj[ai[i] + j]; /* block column value */ 254 if (itmp == bn){ 255 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 256 for (k=0; k<bs; k++) { 257 *cols_i++ = i*bs+k; 258 *v_i++ = aa_i[k]; 259 } 260 *ncols += bs; 261 break; 262 } 263 } 264 } 265 #endif 266 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 272 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 273 { 274 int ierr; 275 276 PetscFunctionBegin; 277 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 278 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 284 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 285 { 286 int ierr; 287 PetscFunctionBegin; 288 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 294 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 295 { 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 297 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 298 PetscScalar *aa; 299 FILE *file; 300 301 PetscFunctionBegin; 302 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 303 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 304 col_lens[0] = MAT_FILE_COOKIE; 305 306 col_lens[1] = A->m; 307 col_lens[2] = A->m; 308 col_lens[3] = a->s_nz*bs2; 309 310 /* store lengths of each row and write (including header) to file */ 311 count = 0; 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 315 } 316 } 317 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 318 ierr = PetscFree(col_lens);CHKERRQ(ierr); 319 320 /* store column indices (zero start index) */ 321 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 322 count = 0; 323 for (i=0; i<a->mbs; i++) { 324 for (j=0; j<bs; j++) { 325 for (k=a->i[i]; k<a->i[i+1]; k++) { 326 for (l=0; l<bs; l++) { 327 jj[count++] = bs*a->j[k] + l; 328 } 329 } 330 } 331 } 332 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 333 ierr = PetscFree(jj);CHKERRQ(ierr); 334 335 /* store nonzero values */ 336 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 337 count = 0; 338 for (i=0; i<a->mbs; i++) { 339 for (j=0; j<bs; j++) { 340 for (k=a->i[i]; k<a->i[i+1]; k++) { 341 for (l=0; l<bs; l++) { 342 aa[count++] = a->a[bs2*k + l*bs + j]; 343 } 344 } 345 } 346 } 347 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 348 ierr = PetscFree(aa);CHKERRQ(ierr); 349 350 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 351 if (file) { 352 fprintf(file,"-matload_block_size %d\n",a->bs); 353 } 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 359 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 360 { 361 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 362 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 363 char *name; 364 PetscViewerFormat format; 365 366 PetscFunctionBegin; 367 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 368 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 369 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 370 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 371 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 372 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 373 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 374 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 375 for (i=0; i<a->mbs; i++) { 376 for (j=0; j<bs; j++) { 377 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 378 for (k=a->i[i]; k<a->i[i+1]; k++) { 379 for (l=0; l<bs; l++) { 380 #if defined(PETSC_USE_COMPLEX) 381 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 383 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 384 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 385 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 386 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 387 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 388 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 389 } 390 #else 391 if (a->a[bs2*k + l*bs + j] != 0.0) { 392 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 393 } 394 #endif 395 } 396 } 397 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 398 } 399 } 400 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 401 } else { 402 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 403 for (i=0; i<a->mbs; i++) { 404 for (j=0; j<bs; j++) { 405 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 406 for (k=a->i[i]; k<a->i[i+1]; k++) { 407 for (l=0; l<bs; l++) { 408 #if defined(PETSC_USE_COMPLEX) 409 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 410 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 411 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 412 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 413 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 414 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 415 } else { 416 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 420 #endif 421 } 422 } 423 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 424 } 425 } 426 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 427 } 428 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 434 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 435 { 436 Mat A = (Mat) Aa; 437 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 438 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 439 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 440 MatScalar *aa; 441 MPI_Comm comm; 442 PetscViewer viewer; 443 444 PetscFunctionBegin; 445 /* 446 This is nasty. If this is called from an originally parallel matrix 447 then all processes call this,but only the first has the matrix so the 448 rest should return immediately. 449 */ 450 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 451 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 452 if (rank) PetscFunctionReturn(0); 453 454 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 455 456 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 457 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 458 459 /* loop over matrix elements drawing boxes */ 460 color = PETSC_DRAW_BLUE; 461 for (i=0,row=0; i<mbs; i++,row+=bs) { 462 for (j=a->i[i]; j<a->i[i+1]; j++) { 463 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 464 x_l = a->j[j]*bs; x_r = x_l + 1.0; 465 aa = a->a + j*bs2; 466 for (k=0; k<bs; k++) { 467 for (l=0; l<bs; l++) { 468 if (PetscRealPart(*aa++) >= 0.) continue; 469 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 470 } 471 } 472 } 473 } 474 color = PETSC_DRAW_CYAN; 475 for (i=0,row=0; i<mbs; i++,row+=bs) { 476 for (j=a->i[i]; j<a->i[i+1]; j++) { 477 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 478 x_l = a->j[j]*bs; x_r = x_l + 1.0; 479 aa = a->a + j*bs2; 480 for (k=0; k<bs; k++) { 481 for (l=0; l<bs; l++) { 482 if (PetscRealPart(*aa++) != 0.) continue; 483 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 484 } 485 } 486 } 487 } 488 489 color = PETSC_DRAW_RED; 490 for (i=0,row=0; i<mbs; i++,row+=bs) { 491 for (j=a->i[i]; j<a->i[i+1]; j++) { 492 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 493 x_l = a->j[j]*bs; x_r = x_l + 1.0; 494 aa = a->a + j*bs2; 495 for (k=0; k<bs; k++) { 496 for (l=0; l<bs; l++) { 497 if (PetscRealPart(*aa++) <= 0.) continue; 498 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 499 } 500 } 501 } 502 } 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 508 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 509 { 510 int ierr; 511 PetscReal xl,yl,xr,yr,w,h; 512 PetscDraw draw; 513 PetscTruth isnull; 514 515 PetscFunctionBegin; 516 517 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 518 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 519 520 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 521 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 522 xr += w; yr += h; xl = -w; yl = -h; 523 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 524 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 525 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "MatView_SeqSBAIJ" 531 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 532 { 533 int ierr; 534 PetscTruth issocket,isascii,isbinary,isdraw; 535 536 PetscFunctionBegin; 537 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 538 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 539 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 540 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 541 if (issocket) { 542 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 543 } else if (isascii){ 544 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 545 } else if (isbinary) { 546 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 547 } else if (isdraw) { 548 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 549 } else { 550 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 551 } 552 PetscFunctionReturn(0); 553 } 554 555 556 #undef __FUNCT__ 557 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 558 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 559 { 560 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 561 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 562 int *ai = a->i,*ailen = a->ilen; 563 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 564 MatScalar *ap,*aa = a->a,zero = 0.0; 565 566 PetscFunctionBegin; 567 for (k=0; k<m; k++) { /* loop over rows */ 568 row = im[k]; brow = row/bs; 569 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 570 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 571 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 572 nrow = ailen[brow]; 573 for (l=0; l<n; l++) { /* loop over columns */ 574 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 575 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 576 col = in[l] ; 577 bcol = col/bs; 578 cidx = col%bs; 579 ridx = row%bs; 580 high = nrow; 581 low = 0; /* assume unsorted */ 582 while (high-low > 5) { 583 t = (low+high)/2; 584 if (rp[t] > bcol) high = t; 585 else low = t; 586 } 587 for (i=low; i<high; i++) { 588 if (rp[i] > bcol) break; 589 if (rp[i] == bcol) { 590 *v++ = ap[bs2*i+bs*cidx+ridx]; 591 goto finished; 592 } 593 } 594 *v++ = zero; 595 finished:; 596 } 597 } 598 PetscFunctionReturn(0); 599 } 600 601 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 604 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 605 { 606 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 607 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 608 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 609 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 610 PetscTruth roworiented=a->roworiented; 611 const MatScalar *value = v; 612 MatScalar *ap,*aa = a->a,*bap; 613 614 PetscFunctionBegin; 615 if (roworiented) { 616 stepval = (n-1)*bs; 617 } else { 618 stepval = (m-1)*bs; 619 } 620 for (k=0; k<m; k++) { /* loop over added rows */ 621 row = im[k]; 622 if (row < 0) continue; 623 #if defined(PETSC_USE_BOPT_g) 624 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 625 #endif 626 rp = aj + ai[row]; 627 ap = aa + bs2*ai[row]; 628 rmax = imax[row]; 629 nrow = ailen[row]; 630 low = 0; 631 for (l=0; l<n; l++) { /* loop over added columns */ 632 if (in[l] < 0) continue; 633 col = in[l]; 634 #if defined(PETSC_USE_BOPT_g) 635 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 636 #endif 637 if (col < row) continue; /* ignore lower triangular block */ 638 if (roworiented) { 639 value = v + k*(stepval+bs)*bs + l*bs; 640 } else { 641 value = v + l*(stepval+bs)*bs + k*bs; 642 } 643 if (!sorted) low = 0; high = nrow; 644 while (high-low > 7) { 645 t = (low+high)/2; 646 if (rp[t] > col) high = t; 647 else low = t; 648 } 649 for (i=low; i<high; i++) { 650 if (rp[i] > col) break; 651 if (rp[i] == col) { 652 bap = ap + bs2*i; 653 if (roworiented) { 654 if (is == ADD_VALUES) { 655 for (ii=0; ii<bs; ii++,value+=stepval) { 656 for (jj=ii; jj<bs2; jj+=bs) { 657 bap[jj] += *value++; 658 } 659 } 660 } else { 661 for (ii=0; ii<bs; ii++,value+=stepval) { 662 for (jj=ii; jj<bs2; jj+=bs) { 663 bap[jj] = *value++; 664 } 665 } 666 } 667 } else { 668 if (is == ADD_VALUES) { 669 for (ii=0; ii<bs; ii++,value+=stepval) { 670 for (jj=0; jj<bs; jj++) { 671 *bap++ += *value++; 672 } 673 } 674 } else { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=0; jj<bs; jj++) { 677 *bap++ = *value++; 678 } 679 } 680 } 681 } 682 goto noinsert2; 683 } 684 } 685 if (nonew == 1) goto noinsert2; 686 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 687 if (nrow >= rmax) { 688 /* there is no extra room in row, therefore enlarge */ 689 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 690 MatScalar *new_a; 691 692 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 693 694 /* malloc new storage space */ 695 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 696 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 697 new_j = (int*)(new_a + bs2*new_nz); 698 new_i = new_j + new_nz; 699 700 /* copy over old data into new slots */ 701 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 702 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 703 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 704 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 705 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 706 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 707 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 708 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 709 /* free up old matrix storage */ 710 ierr = PetscFree(a->a);CHKERRQ(ierr); 711 if (!a->singlemalloc) { 712 ierr = PetscFree(a->i);CHKERRQ(ierr); 713 ierr = PetscFree(a->j);CHKERRQ(ierr); 714 } 715 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 716 a->singlemalloc = PETSC_TRUE; 717 718 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 719 rmax = imax[row] = imax[row] + CHUNKSIZE; 720 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 721 a->s_maxnz += bs2*CHUNKSIZE; 722 a->reallocs++; 723 a->s_nz++; 724 } 725 N = nrow++ - 1; 726 /* shift up all the later entries in this row */ 727 for (ii=N; ii>=i; ii--) { 728 rp[ii+1] = rp[ii]; 729 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 730 } 731 if (N >= i) { 732 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 733 } 734 rp[i] = col; 735 bap = ap + bs2*i; 736 if (roworiented) { 737 for (ii=0; ii<bs; ii++,value+=stepval) { 738 for (jj=ii; jj<bs2; jj+=bs) { 739 bap[jj] = *value++; 740 } 741 } 742 } else { 743 for (ii=0; ii<bs; ii++,value+=stepval) { 744 for (jj=0; jj<bs; jj++) { 745 *bap++ = *value++; 746 } 747 } 748 } 749 noinsert2:; 750 low = i; 751 } 752 ailen[row] = nrow; 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 759 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 760 { 761 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 762 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 763 int m = A->m,*ip,N,*ailen = a->ilen; 764 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 765 MatScalar *aa = a->a,*ap; 766 767 PetscFunctionBegin; 768 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 769 770 if (m) rmax = ailen[0]; 771 for (i=1; i<mbs; i++) { 772 /* move each row back by the amount of empty slots (fshift) before it*/ 773 fshift += imax[i-1] - ailen[i-1]; 774 rmax = PetscMax(rmax,ailen[i]); 775 if (fshift) { 776 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 777 N = ailen[i]; 778 for (j=0; j<N; j++) { 779 ip[j-fshift] = ip[j]; 780 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 781 } 782 } 783 ai[i] = ai[i-1] + ailen[i-1]; 784 } 785 if (mbs) { 786 fshift += imax[mbs-1] - ailen[mbs-1]; 787 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 788 } 789 /* reset ilen and imax for each row */ 790 for (i=0; i<mbs; i++) { 791 ailen[i] = imax[i] = ai[i+1] - ai[i]; 792 } 793 a->s_nz = ai[mbs]; 794 795 /* diagonals may have moved, reset it */ 796 if (a->diag) { 797 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 798 } 799 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 800 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 801 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 802 a->reallocs); 803 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 804 a->reallocs = 0; 805 A->info.nz_unneeded = (PetscReal)fshift*bs2; 806 807 PetscFunctionReturn(0); 808 } 809 810 /* 811 This function returns an array of flags which indicate the locations of contiguous 812 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 813 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 814 Assume: sizes should be long enough to hold all the values. 815 */ 816 #undef __FUNCT__ 817 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 818 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 819 { 820 int i,j,k,row; 821 PetscTruth flg; 822 823 PetscFunctionBegin; 824 for (i=0,j=0; i<n; j++) { 825 row = idx[i]; 826 if (row%bs!=0) { /* Not the begining of a block */ 827 sizes[j] = 1; 828 i++; 829 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 830 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 831 i++; 832 } else { /* Begining of the block, so check if the complete block exists */ 833 flg = PETSC_TRUE; 834 for (k=1; k<bs; k++) { 835 if (row+k != idx[i+k]) { /* break in the block */ 836 flg = PETSC_FALSE; 837 break; 838 } 839 } 840 if (flg == PETSC_TRUE) { /* No break in the bs */ 841 sizes[j] = bs; 842 i+= bs; 843 } else { 844 sizes[j] = 1; 845 i++; 846 } 847 } 848 } 849 *bs_max = j; 850 PetscFunctionReturn(0); 851 } 852 853 #undef __FUNCT__ 854 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 855 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 856 { 857 PetscFunctionBegin; 858 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 859 } 860 861 /* Only add/insert a(i,j) with i<=j (blocks). 862 Any a(i,j) with i>j input by user is ingored. 863 */ 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 867 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 868 { 869 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 870 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 871 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 872 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 873 int ridx,cidx,bs2=a->bs2,ierr; 874 MatScalar *ap,value,*aa=a->a,*bap; 875 876 PetscFunctionBegin; 877 878 for (k=0; k<m; k++) { /* loop over added rows */ 879 row = im[k]; /* row number */ 880 brow = row/bs; /* block row number */ 881 if (row < 0) continue; 882 #if defined(PETSC_USE_BOPT_g) 883 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 884 #endif 885 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 886 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 887 rmax = imax[brow]; /* maximum space allocated for this row */ 888 nrow = ailen[brow]; /* actual length of this row */ 889 low = 0; 890 891 for (l=0; l<n; l++) { /* loop over added columns */ 892 if (in[l] < 0) continue; 893 #if defined(PETSC_USE_BOPT_g) 894 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 895 #endif 896 col = in[l]; 897 bcol = col/bs; /* block col number */ 898 899 if (brow <= bcol){ 900 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 901 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 902 /* element value a(k,l) */ 903 if (roworiented) { 904 value = v[l + k*n]; 905 } else { 906 value = v[k + l*m]; 907 } 908 909 /* move pointer bap to a(k,l) quickly and add/insert value */ 910 if (!sorted) low = 0; high = nrow; 911 while (high-low > 7) { 912 t = (low+high)/2; 913 if (rp[t] > bcol) high = t; 914 else low = t; 915 } 916 for (i=low; i<high; i++) { 917 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 918 if (rp[i] > bcol) break; 919 if (rp[i] == bcol) { 920 bap = ap + bs2*i + bs*cidx + ridx; 921 if (is == ADD_VALUES) *bap += value; 922 else *bap = value; 923 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 924 if (brow == bcol && ridx < cidx){ 925 bap = ap + bs2*i + bs*ridx + cidx; 926 if (is == ADD_VALUES) *bap += value; 927 else *bap = value; 928 } 929 goto noinsert1; 930 } 931 } 932 933 if (nonew == 1) goto noinsert1; 934 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 935 if (nrow >= rmax) { 936 /* there is no extra room in row, therefore enlarge */ 937 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 938 MatScalar *new_a; 939 940 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 941 942 /* Malloc new storage space */ 943 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 944 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 945 new_j = (int*)(new_a + bs2*new_nz); 946 new_i = new_j + new_nz; 947 948 /* copy over old data into new slots */ 949 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 950 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 951 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 952 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 953 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 954 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 955 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 956 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 957 /* free up old matrix storage */ 958 ierr = PetscFree(a->a);CHKERRQ(ierr); 959 if (!a->singlemalloc) { 960 ierr = PetscFree(a->i);CHKERRQ(ierr); 961 ierr = PetscFree(a->j);CHKERRQ(ierr); 962 } 963 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 964 a->singlemalloc = PETSC_TRUE; 965 966 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 967 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 968 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 969 a->s_maxnz += bs2*CHUNKSIZE; 970 a->reallocs++; 971 a->s_nz++; 972 } 973 974 N = nrow++ - 1; 975 /* shift up all the later entries in this row */ 976 for (ii=N; ii>=i; ii--) { 977 rp[ii+1] = rp[ii]; 978 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 979 } 980 if (N>=i) { 981 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 982 } 983 rp[i] = bcol; 984 ap[bs2*i + bs*cidx + ridx] = value; 985 noinsert1:; 986 low = i; 987 /* } */ 988 } 989 } /* end of if .. if.. */ 990 } /* end of loop over added columns */ 991 ailen[brow] = nrow; 992 } /* end of loop over added rows */ 993 994 PetscFunctionReturn(0); 995 } 996 997 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 998 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 999 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 1000 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1001 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 1002 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1003 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1004 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat); 1005 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1006 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1007 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1008 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1009 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1010 extern int MatZeroEntries_SeqSBAIJ(Mat); 1011 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1012 1013 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1014 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1017 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1018 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1019 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1020 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1021 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1024 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1025 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1026 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1027 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1028 1029 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1030 1031 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1035 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1036 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1037 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1038 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1039 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1047 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1048 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1049 1050 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1051 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1054 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1055 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1056 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1057 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1058 1059 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1060 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1063 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1064 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1065 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1066 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1067 1068 #ifdef HAVE_MatICCFactor 1069 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1072 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1073 { 1074 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1075 Mat outA; 1076 int ierr; 1077 PetscTruth row_identity,col_identity; 1078 1079 PetscFunctionBegin; 1080 /* 1081 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1082 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1083 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1084 if (!row_identity || !col_identity) { 1085 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1086 } 1087 */ 1088 1089 outA = inA; 1090 inA->factor = FACTOR_CHOLESKY; 1091 1092 if (!a->diag) { 1093 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1094 } 1095 /* 1096 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1097 for ILU(0) factorization with natural ordering 1098 */ 1099 switch (a->bs) { 1100 case 1: 1101 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1102 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1103 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1104 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1105 case 2: 1106 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1107 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1108 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1109 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1110 break; 1111 case 3: 1112 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1113 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1114 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1115 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1116 break; 1117 case 4: 1118 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1119 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1120 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1121 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1122 break; 1123 case 5: 1124 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1125 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1126 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1127 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1128 break; 1129 case 6: 1130 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1131 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1132 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1133 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1134 break; 1135 case 7: 1136 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1137 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1138 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1139 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1140 break; 1141 default: 1142 a->row = row; 1143 a->icol = col; 1144 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1145 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1146 1147 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1148 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1149 PetscLogObjectParent(inA,a->icol); 1150 1151 if (!a->solve_work) { 1152 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1153 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1154 } 1155 } 1156 1157 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1158 1159 PetscFunctionReturn(0); 1160 } 1161 #endif 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1165 int MatPrintHelp_SeqSBAIJ(Mat A) 1166 { 1167 static PetscTruth called = PETSC_FALSE; 1168 MPI_Comm comm = A->comm; 1169 int ierr; 1170 1171 PetscFunctionBegin; 1172 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1173 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1174 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 EXTERN_C_BEGIN 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1181 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1182 { 1183 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1184 int i,nz,n; 1185 1186 PetscFunctionBegin; 1187 nz = baij->s_maxnz; 1188 n = mat->n; 1189 for (i=0; i<nz; i++) { 1190 baij->j[i] = indices[i]; 1191 } 1192 baij->s_nz = nz; 1193 for (i=0; i<n; i++) { 1194 baij->ilen[i] = baij->imax[i]; 1195 } 1196 1197 PetscFunctionReturn(0); 1198 } 1199 EXTERN_C_END 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1203 /*@ 1204 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1205 in the matrix. 1206 1207 Input Parameters: 1208 + mat - the SeqSBAIJ matrix 1209 - indices - the column indices 1210 1211 Level: advanced 1212 1213 Notes: 1214 This can be called if you have precomputed the nonzero structure of the 1215 matrix and want to provide it to the matrix object to improve the performance 1216 of the MatSetValues() operation. 1217 1218 You MUST have set the correct numbers of nonzeros per row in the call to 1219 MatCreateSeqSBAIJ(). 1220 1221 MUST be called before any calls to MatSetValues() 1222 1223 .seealso: MatCreateSeqSBAIJ 1224 @*/ 1225 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1226 { 1227 int ierr,(*f)(Mat,int *); 1228 1229 PetscFunctionBegin; 1230 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1231 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1232 if (f) { 1233 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1234 } else { 1235 SETERRQ(1,"Wrong type of matrix to set column indices"); 1236 } 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1242 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1243 { 1244 int ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1253 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1254 { 1255 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1256 PetscFunctionBegin; 1257 *array = a->a; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1263 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1264 { 1265 PetscFunctionBegin; 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #include "petscblaslapack.h" 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1272 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1273 { 1274 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1275 int ierr,one=1,i,bs=y->bs,bs2,j; 1276 1277 PetscFunctionBegin; 1278 if (str == SAME_NONZERO_PATTERN) { 1279 BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1280 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1281 if (y->xtoy && y->XtoY != X) { 1282 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1283 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1284 } 1285 if (!y->xtoy) { /* get xtoy */ 1286 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1287 y->XtoY = X; 1288 } 1289 bs2 = bs*bs; 1290 for (i=0; i<x->s_nz; i++) { 1291 j = 0; 1292 while (j < bs2){ 1293 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1294 j++; 1295 } 1296 } 1297 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1298 } else { 1299 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1306 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1307 { 1308 PetscFunctionBegin; 1309 *flg = PETSC_TRUE; 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1315 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1316 { 1317 PetscFunctionBegin; 1318 *flg = PETSC_TRUE; 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1324 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1325 { 1326 PetscFunctionBegin; 1327 *flg = PETSC_FALSE; 1328 PetscFunctionReturn(0); 1329 } 1330 1331 /* -------------------------------------------------------------------*/ 1332 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1333 MatGetRow_SeqSBAIJ, 1334 MatRestoreRow_SeqSBAIJ, 1335 MatMult_SeqSBAIJ_N, 1336 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1337 MatMultTranspose_SeqSBAIJ, 1338 MatMultTransposeAdd_SeqSBAIJ, 1339 MatSolve_SeqSBAIJ_N, 1340 0, 1341 0, 1342 /*10*/ 0, 1343 0, 1344 MatCholeskyFactor_SeqSBAIJ, 1345 MatRelax_SeqSBAIJ, 1346 MatTranspose_SeqSBAIJ, 1347 /*15*/ MatGetInfo_SeqSBAIJ, 1348 MatEqual_SeqSBAIJ, 1349 MatGetDiagonal_SeqSBAIJ, 1350 MatDiagonalScale_SeqSBAIJ, 1351 MatNorm_SeqSBAIJ, 1352 /*20*/ 0, 1353 MatAssemblyEnd_SeqSBAIJ, 1354 0, 1355 MatSetOption_SeqSBAIJ, 1356 MatZeroEntries_SeqSBAIJ, 1357 /*25*/ MatZeroRows_SeqSBAIJ, 1358 0, 1359 0, 1360 MatCholeskyFactorSymbolic_SeqSBAIJ, 1361 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1362 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1363 0, 1364 MatICCFactorSymbolic_SeqSBAIJ, 1365 MatGetArray_SeqSBAIJ, 1366 MatRestoreArray_SeqSBAIJ, 1367 /*35*/ MatDuplicate_SeqSBAIJ, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /*40*/ MatAXPY_SeqSBAIJ, 1373 MatGetSubMatrices_SeqSBAIJ, 1374 MatIncreaseOverlap_SeqSBAIJ, 1375 MatGetValues_SeqSBAIJ, 1376 0, 1377 /*45*/ MatPrintHelp_SeqSBAIJ, 1378 MatScale_SeqSBAIJ, 1379 0, 1380 0, 1381 0, 1382 /*50*/ MatGetBlockSize_SeqSBAIJ, 1383 MatGetRowIJ_SeqSBAIJ, 1384 MatRestoreRowIJ_SeqSBAIJ, 1385 0, 1386 0, 1387 /*55*/ 0, 1388 0, 1389 0, 1390 0, 1391 MatSetValuesBlocked_SeqSBAIJ, 1392 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1393 0, 1394 0, 1395 MatGetPetscMaps_Petsc, 1396 0, 1397 /*65*/ 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /*70*/ MatGetRowMax_SeqSBAIJ, 1403 0, 1404 0, 1405 0, 1406 0, 1407 /*75*/ 0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /*80*/ 0, 1413 0, 1414 0, 1415 0, 1416 #if !defined(PETSC_USE_COMPLEX) 1417 MatGetInertia_SeqSBAIJ, 1418 #else 1419 0, 1420 #endif 1421 /*85*/ MatLoad_SeqSBAIJ, 1422 MatIsSymmetric_SeqSBAIJ, 1423 MatIsStructurallySymmetric_SeqSBAIJ, 1424 MatIsHermitian_SeqSBAIJ 1425 }; 1426 1427 EXTERN_C_BEGIN 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1430 int MatStoreValues_SeqSBAIJ(Mat mat) 1431 { 1432 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1433 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1434 int ierr; 1435 1436 PetscFunctionBegin; 1437 if (aij->nonew != 1) { 1438 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1439 } 1440 1441 /* allocate space for values if not already there */ 1442 if (!aij->saved_values) { 1443 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1444 } 1445 1446 /* copy values over */ 1447 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 EXTERN_C_END 1451 1452 EXTERN_C_BEGIN 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1455 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1456 { 1457 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1458 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1459 1460 PetscFunctionBegin; 1461 if (aij->nonew != 1) { 1462 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1463 } 1464 if (!aij->saved_values) { 1465 SETERRQ(1,"Must call MatStoreValues(A);first"); 1466 } 1467 1468 /* copy values over */ 1469 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 EXTERN_C_END 1473 1474 EXTERN_C_BEGIN 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1477 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1478 { 1479 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1480 int i,len,ierr,mbs,bs2; 1481 PetscTruth flg; 1482 int s_nz; 1483 1484 PetscFunctionBegin; 1485 B->preallocated = PETSC_TRUE; 1486 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1487 mbs = B->m/bs; 1488 bs2 = bs*bs; 1489 1490 if (mbs*bs != B->m) { 1491 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1492 } 1493 1494 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1495 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1496 if (nnz) { 1497 for (i=0; i<mbs; i++) { 1498 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1499 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1500 } 1501 } 1502 1503 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1504 if (!flg) { 1505 switch (bs) { 1506 case 1: 1507 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1508 B->ops->solve = MatSolve_SeqSBAIJ_1; 1509 B->ops->solves = MatSolves_SeqSBAIJ_1; 1510 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1511 B->ops->mult = MatMult_SeqSBAIJ_1; 1512 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1513 break; 1514 case 2: 1515 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1516 B->ops->solve = MatSolve_SeqSBAIJ_2; 1517 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1518 B->ops->mult = MatMult_SeqSBAIJ_2; 1519 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1520 break; 1521 case 3: 1522 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1523 B->ops->solve = MatSolve_SeqSBAIJ_3; 1524 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1525 B->ops->mult = MatMult_SeqSBAIJ_3; 1526 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1527 break; 1528 case 4: 1529 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1530 B->ops->solve = MatSolve_SeqSBAIJ_4; 1531 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1532 B->ops->mult = MatMult_SeqSBAIJ_4; 1533 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1534 break; 1535 case 5: 1536 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1537 B->ops->solve = MatSolve_SeqSBAIJ_5; 1538 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1539 B->ops->mult = MatMult_SeqSBAIJ_5; 1540 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1541 break; 1542 case 6: 1543 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1544 B->ops->solve = MatSolve_SeqSBAIJ_6; 1545 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1546 B->ops->mult = MatMult_SeqSBAIJ_6; 1547 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1548 break; 1549 case 7: 1550 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1551 B->ops->solve = MatSolve_SeqSBAIJ_7; 1552 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1553 B->ops->mult = MatMult_SeqSBAIJ_7; 1554 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1555 break; 1556 } 1557 } 1558 1559 b->mbs = mbs; 1560 b->nbs = mbs; 1561 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1562 if (!nnz) { 1563 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1564 else if (nz <= 0) nz = 1; 1565 for (i=0; i<mbs; i++) { 1566 b->imax[i] = nz; 1567 } 1568 nz = nz*mbs; /* total nz */ 1569 } else { 1570 nz = 0; 1571 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1572 } 1573 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1574 s_nz = nz; 1575 1576 /* allocate the matrix space */ 1577 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1578 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1579 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1580 b->j = (int*)(b->a + s_nz*bs2); 1581 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1582 b->i = b->j + s_nz; 1583 b->singlemalloc = PETSC_TRUE; 1584 1585 /* pointer to beginning of each row */ 1586 b->i[0] = 0; 1587 for (i=1; i<mbs+1; i++) { 1588 b->i[i] = b->i[i-1] + b->imax[i-1]; 1589 } 1590 1591 /* b->ilen will count nonzeros in each block row so far. */ 1592 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1593 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1594 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1595 1596 b->bs = bs; 1597 b->bs2 = bs2; 1598 b->s_nz = 0; 1599 b->s_maxnz = s_nz*bs2; 1600 1601 b->inew = 0; 1602 b->jnew = 0; 1603 b->anew = 0; 1604 b->a2anew = 0; 1605 b->permute = PETSC_FALSE; 1606 PetscFunctionReturn(0); 1607 } 1608 EXTERN_C_END 1609 1610 /*MC 1611 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1612 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1613 1614 Options Database Keys: 1615 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1616 1617 Level: beginner 1618 1619 .seealso: MatCreateSeqSBAIJ 1620 M*/ 1621 1622 EXTERN_C_BEGIN 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1625 int MatCreate_SeqSBAIJ(Mat B) 1626 { 1627 Mat_SeqSBAIJ *b; 1628 int ierr,size; 1629 1630 PetscFunctionBegin; 1631 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1632 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1633 B->m = B->M = PetscMax(B->m,B->M); 1634 B->n = B->N = PetscMax(B->n,B->N); 1635 1636 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1637 B->data = (void*)b; 1638 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1639 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1640 B->ops->destroy = MatDestroy_SeqSBAIJ; 1641 B->ops->view = MatView_SeqSBAIJ; 1642 B->factor = 0; 1643 B->lupivotthreshold = 1.0; 1644 B->mapping = 0; 1645 b->row = 0; 1646 b->icol = 0; 1647 b->reallocs = 0; 1648 b->saved_values = 0; 1649 1650 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1651 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1652 1653 b->sorted = PETSC_FALSE; 1654 b->roworiented = PETSC_TRUE; 1655 b->nonew = 0; 1656 b->diag = 0; 1657 b->solve_work = 0; 1658 b->mult_work = 0; 1659 B->spptr = 0; 1660 b->keepzeroedrows = PETSC_FALSE; 1661 b->xtoy = 0; 1662 b->XtoY = 0; 1663 1664 b->inew = 0; 1665 b->jnew = 0; 1666 b->anew = 0; 1667 b->a2anew = 0; 1668 b->permute = PETSC_FALSE; 1669 1670 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1671 "MatStoreValues_SeqSBAIJ", 1672 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1674 "MatRetrieveValues_SeqSBAIJ", 1675 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1677 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1678 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1680 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1681 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1682 PetscFunctionReturn(0); 1683 } 1684 EXTERN_C_END 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1688 /*@C 1689 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1690 compressed row) format. For good matrix assembly performance the 1691 user should preallocate the matrix storage by setting the parameter nz 1692 (or the array nnz). By setting these parameters accurately, performance 1693 during matrix assembly can be increased by more than a factor of 50. 1694 1695 Collective on Mat 1696 1697 Input Parameters: 1698 + A - the symmetric matrix 1699 . bs - size of block 1700 . nz - number of block nonzeros per block row (same for all rows) 1701 - nnz - array containing the number of block nonzeros in the upper triangular plus 1702 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1703 1704 Options Database Keys: 1705 . -mat_no_unroll - uses code that does not unroll the loops in the 1706 block calculations (much slower) 1707 . -mat_block_size - size of the blocks to use 1708 1709 Level: intermediate 1710 1711 Notes: 1712 Specify the preallocated storage with either nz or nnz (not both). 1713 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1714 allocation. For additional details, see the users manual chapter on 1715 matrices. 1716 1717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1718 @*/ 1719 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1720 int ierr,(*f)(Mat,int,int,const int[]); 1721 1722 PetscFunctionBegin; 1723 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1724 if (f) { 1725 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCreateSeqSBAIJ" 1732 /*@C 1733 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1734 compressed row) format. For good matrix assembly performance the 1735 user should preallocate the matrix storage by setting the parameter nz 1736 (or the array nnz). By setting these parameters accurately, performance 1737 during matrix assembly can be increased by more than a factor of 50. 1738 1739 Collective on MPI_Comm 1740 1741 Input Parameters: 1742 + comm - MPI communicator, set to PETSC_COMM_SELF 1743 . bs - size of block 1744 . m - number of rows, or number of columns 1745 . nz - number of block nonzeros per block row (same for all rows) 1746 - nnz - array containing the number of block nonzeros in the upper triangular plus 1747 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1748 1749 Output Parameter: 1750 . A - the symmetric matrix 1751 1752 Options Database Keys: 1753 . -mat_no_unroll - uses code that does not unroll the loops in the 1754 block calculations (much slower) 1755 . -mat_block_size - size of the blocks to use 1756 1757 Level: intermediate 1758 1759 Notes: 1760 1761 Specify the preallocated storage with either nz or nnz (not both). 1762 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1763 allocation. For additional details, see the users manual chapter on 1764 matrices. 1765 1766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1767 @*/ 1768 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1769 { 1770 int ierr; 1771 1772 PetscFunctionBegin; 1773 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1774 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1775 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779 #undef __FUNCT__ 1780 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1781 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1782 { 1783 Mat C; 1784 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1785 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1786 1787 PetscFunctionBegin; 1788 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1789 1790 *B = 0; 1791 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1792 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1793 c = (Mat_SeqSBAIJ*)C->data; 1794 1795 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1796 C->preallocated = PETSC_TRUE; 1797 C->factor = A->factor; 1798 c->row = 0; 1799 c->icol = 0; 1800 c->saved_values = 0; 1801 c->keepzeroedrows = a->keepzeroedrows; 1802 C->assembled = PETSC_TRUE; 1803 1804 c->bs = a->bs; 1805 c->bs2 = a->bs2; 1806 c->mbs = a->mbs; 1807 c->nbs = a->nbs; 1808 1809 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1810 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1811 for (i=0; i<mbs; i++) { 1812 c->imax[i] = a->imax[i]; 1813 c->ilen[i] = a->ilen[i]; 1814 } 1815 1816 /* allocate the matrix space */ 1817 c->singlemalloc = PETSC_TRUE; 1818 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1819 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1820 c->j = (int*)(c->a + nz*bs2); 1821 c->i = c->j + nz; 1822 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1823 if (mbs > 0) { 1824 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1825 if (cpvalues == MAT_COPY_VALUES) { 1826 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1827 } else { 1828 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1829 } 1830 } 1831 1832 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1833 c->sorted = a->sorted; 1834 c->roworiented = a->roworiented; 1835 c->nonew = a->nonew; 1836 1837 if (a->diag) { 1838 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1839 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1840 for (i=0; i<mbs; i++) { 1841 c->diag[i] = a->diag[i]; 1842 } 1843 } else c->diag = 0; 1844 c->s_nz = a->s_nz; 1845 c->s_maxnz = a->s_maxnz; 1846 c->solve_work = 0; 1847 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1848 c->mult_work = 0; 1849 *B = C; 1850 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1856 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1857 { 1858 Mat_SeqSBAIJ *a; 1859 Mat B; 1860 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1861 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1862 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1863 int *masked,nmask,tmp,bs2,ishift; 1864 PetscScalar *aa; 1865 MPI_Comm comm = ((PetscObject)viewer)->comm; 1866 1867 PetscFunctionBegin; 1868 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1869 bs2 = bs*bs; 1870 1871 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1872 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1873 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1874 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1875 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1876 M = header[1]; N = header[2]; nz = header[3]; 1877 1878 if (header[3] < 0) { 1879 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1880 } 1881 1882 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1883 1884 /* 1885 This code adds extra rows to make sure the number of rows is 1886 divisible by the blocksize 1887 */ 1888 mbs = M/bs; 1889 extra_rows = bs - M + bs*(mbs); 1890 if (extra_rows == bs) extra_rows = 0; 1891 else mbs++; 1892 if (extra_rows) { 1893 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1894 } 1895 1896 /* read in row lengths */ 1897 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1898 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1899 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1900 1901 /* read in column indices */ 1902 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1903 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1904 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1905 1906 /* loop over row lengths determining block row lengths */ 1907 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1908 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1909 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1910 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1911 masked = mask + mbs; 1912 rowcount = 0; nzcount = 0; 1913 for (i=0; i<mbs; i++) { 1914 nmask = 0; 1915 for (j=0; j<bs; j++) { 1916 kmax = rowlengths[rowcount]; 1917 for (k=0; k<kmax; k++) { 1918 tmp = jj[nzcount++]/bs; /* block col. index */ 1919 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1920 } 1921 rowcount++; 1922 } 1923 s_browlengths[i] += nmask; 1924 1925 /* zero out the mask elements we set */ 1926 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1927 } 1928 1929 /* create our matrix */ 1930 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1931 ierr = MatSetType(B,type);CHKERRQ(ierr); 1932 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1933 a = (Mat_SeqSBAIJ*)B->data; 1934 1935 /* set matrix "i" values */ 1936 a->i[0] = 0; 1937 for (i=1; i<= mbs; i++) { 1938 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1939 a->ilen[i-1] = s_browlengths[i-1]; 1940 } 1941 a->s_nz = a->i[mbs]; 1942 1943 /* read in nonzero values */ 1944 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1945 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1946 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1947 1948 /* set "a" and "j" values into matrix */ 1949 nzcount = 0; jcount = 0; 1950 for (i=0; i<mbs; i++) { 1951 nzcountb = nzcount; 1952 nmask = 0; 1953 for (j=0; j<bs; j++) { 1954 kmax = rowlengths[i*bs+j]; 1955 for (k=0; k<kmax; k++) { 1956 tmp = jj[nzcount++]/bs; /* block col. index */ 1957 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1958 } 1959 } 1960 /* sort the masked values */ 1961 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1962 1963 /* set "j" values into matrix */ 1964 maskcount = 1; 1965 for (j=0; j<nmask; j++) { 1966 a->j[jcount++] = masked[j]; 1967 mask[masked[j]] = maskcount++; 1968 } 1969 1970 /* set "a" values into matrix */ 1971 ishift = bs2*a->i[i]; 1972 for (j=0; j<bs; j++) { 1973 kmax = rowlengths[i*bs+j]; 1974 for (k=0; k<kmax; k++) { 1975 tmp = jj[nzcountb]/bs ; /* block col. index */ 1976 if (tmp >= i){ 1977 block = mask[tmp] - 1; 1978 point = jj[nzcountb] - bs*tmp; 1979 idx = ishift + bs2*block + j + bs*point; 1980 a->a[idx] = aa[nzcountb]; 1981 } 1982 nzcountb++; 1983 } 1984 } 1985 /* zero out the mask elements we set */ 1986 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1987 } 1988 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1989 1990 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1991 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1992 ierr = PetscFree(aa);CHKERRQ(ierr); 1993 ierr = PetscFree(jj);CHKERRQ(ierr); 1994 ierr = PetscFree(mask);CHKERRQ(ierr); 1995 1996 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1997 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1998 ierr = MatView_Private(B);CHKERRQ(ierr); 1999 *A = B; 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatRelax_SeqSBAIJ" 2005 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 2006 { 2007 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2008 MatScalar *aa=a->a,*v,*v1; 2009 PetscScalar *x,*b,*t,sum,d; 2010 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 2011 int nz,nz1,*vj,*vj1,i; 2012 2013 PetscFunctionBegin; 2014 its = its*lits; 2015 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2016 2017 if (bs > 1) 2018 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2019 2020 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2021 if (xx != bb) { 2022 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2023 } else { 2024 b = x; 2025 } 2026 2027 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 2028 2029 if (flag & SOR_ZERO_INITIAL_GUESS) { 2030 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2031 for (i=0; i<m; i++) 2032 t[i] = b[i]; 2033 2034 for (i=0; i<m; i++){ 2035 d = *(aa + ai[i]); /* diag[i] */ 2036 v = aa + ai[i] + 1; 2037 vj = aj + ai[i] + 1; 2038 nz = ai[i+1] - ai[i] - 1; 2039 x[i] = omega*t[i]/d; 2040 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2041 PetscLogFlops(2*nz-1); 2042 } 2043 } 2044 2045 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2046 for (i=0; i<m; i++) 2047 t[i] = b[i]; 2048 2049 for (i=0; i<m-1; i++){ /* update rhs */ 2050 v = aa + ai[i] + 1; 2051 vj = aj + ai[i] + 1; 2052 nz = ai[i+1] - ai[i] - 1; 2053 while (nz--) t[*vj++] -= x[i]*(*v++); 2054 PetscLogFlops(2*nz-1); 2055 } 2056 for (i=m-1; i>=0; i--){ 2057 d = *(aa + ai[i]); 2058 v = aa + ai[i] + 1; 2059 vj = aj + ai[i] + 1; 2060 nz = ai[i+1] - ai[i] - 1; 2061 sum = t[i]; 2062 while (nz--) sum -= x[*vj++]*(*v++); 2063 PetscLogFlops(2*nz-1); 2064 x[i] = (1-omega)*x[i] + omega*sum/d; 2065 } 2066 } 2067 its--; 2068 } 2069 2070 while (its--) { 2071 /* 2072 forward sweep: 2073 for i=0,...,m-1: 2074 sum[i] = (b[i] - U(i,:)x )/d[i]; 2075 x[i] = (1-omega)x[i] + omega*sum[i]; 2076 b = b - x[i]*U^T(i,:); 2077 2078 */ 2079 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2080 for (i=0; i<m; i++) 2081 t[i] = b[i]; 2082 2083 for (i=0; i<m; i++){ 2084 d = *(aa + ai[i]); /* diag[i] */ 2085 v = aa + ai[i] + 1; v1=v; 2086 vj = aj + ai[i] + 1; vj1=vj; 2087 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2088 sum = t[i]; 2089 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2090 x[i] = (1-omega)*x[i] + omega*sum/d; 2091 while (nz--) t[*vj++] -= x[i]*(*v++); 2092 PetscLogFlops(4*nz-2); 2093 } 2094 } 2095 2096 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2097 /* 2098 backward sweep: 2099 b = b - x[i]*U^T(i,:), i=0,...,n-2 2100 for i=m-1,...,0: 2101 sum[i] = (b[i] - U(i,:)x )/d[i]; 2102 x[i] = (1-omega)x[i] + omega*sum[i]; 2103 */ 2104 for (i=0; i<m; i++) 2105 t[i] = b[i]; 2106 2107 for (i=0; i<m-1; i++){ /* update rhs */ 2108 v = aa + ai[i] + 1; 2109 vj = aj + ai[i] + 1; 2110 nz = ai[i+1] - ai[i] - 1; 2111 while (nz--) t[*vj++] -= x[i]*(*v++); 2112 PetscLogFlops(2*nz-1); 2113 } 2114 for (i=m-1; i>=0; i--){ 2115 d = *(aa + ai[i]); 2116 v = aa + ai[i] + 1; 2117 vj = aj + ai[i] + 1; 2118 nz = ai[i+1] - ai[i] - 1; 2119 sum = t[i]; 2120 while (nz--) sum -= x[*vj++]*(*v++); 2121 PetscLogFlops(2*nz-1); 2122 x[i] = (1-omega)*x[i] + omega*sum/d; 2123 } 2124 } 2125 } 2126 2127 ierr = PetscFree(t); CHKERRQ(ierr); 2128 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2129 if (bb != xx) { 2130 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2131 } 2132 2133 PetscFunctionReturn(0); 2134 } 2135 2136 2137 2138 2139 2140 2141