1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* UGLY, ugly, ugly 13 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 #if defined(PETSC_HAVE_DSCPACK) 25 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 26 #endif 27 28 #define CHUNKSIZE 10 29 30 /* 31 Checks for missing diagonals 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 35 int MatMissingDiagonal_SeqBAIJ(Mat A) 36 { 37 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 38 int *diag,*jj = a->j,i,ierr; 39 40 PetscFunctionBegin; 41 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 42 diag = a->diag; 43 for (i=0; i<a->mbs; i++) { 44 if (jj[diag[i]] != i) { 45 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 46 } 47 } 48 PetscFunctionReturn(0); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 53 int MatMarkDiagonal_SeqBAIJ(Mat A) 54 { 55 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 56 int i,j,*diag,m = a->mbs,ierr; 57 58 PetscFunctionBegin; 59 if (a->diag) PetscFunctionReturn(0); 60 61 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 62 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 63 for (i=0; i<m; i++) { 64 diag[i] = a->i[i+1]; 65 for (j=a->i[i]; j<a->i[i+1]; j++) { 66 if (a->j[j] == i) { 67 diag[i] = j; 68 break; 69 } 70 } 71 } 72 a->diag = diag; 73 PetscFunctionReturn(0); 74 } 75 76 77 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 78 79 #undef __FUNCT__ 80 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 81 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 82 { 83 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 84 int ierr,n = a->mbs,i; 85 86 PetscFunctionBegin; 87 *nn = n; 88 if (!ia) PetscFunctionReturn(0); 89 if (symmetric) { 90 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 91 } else if (oshift == 1) { 92 /* temporarily add 1 to i and j indices */ 93 int nz = a->i[n]; 94 for (i=0; i<nz; i++) a->j[i]++; 95 for (i=0; i<n+1; i++) a->i[i]++; 96 *ia = a->i; *ja = a->j; 97 } else { 98 *ia = a->i; *ja = a->j; 99 } 100 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 106 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 107 { 108 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 109 int i,n = a->mbs,ierr; 110 111 PetscFunctionBegin; 112 if (!ia) PetscFunctionReturn(0); 113 if (symmetric) { 114 ierr = PetscFree(*ia);CHKERRQ(ierr); 115 ierr = PetscFree(*ja);CHKERRQ(ierr); 116 } else if (oshift == 1) { 117 int nz = a->i[n]-1; 118 for (i=0; i<nz; i++) a->j[i]--; 119 for (i=0; i<n+1; i++) a->i[i]--; 120 } 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 126 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 127 { 128 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 129 130 PetscFunctionBegin; 131 *bs = baij->bs; 132 PetscFunctionReturn(0); 133 } 134 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatDestroy_SeqBAIJ" 138 int MatDestroy_SeqBAIJ(Mat A) 139 { 140 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 141 int ierr; 142 143 PetscFunctionBegin; 144 #if defined(PETSC_USE_LOG) 145 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 146 #endif 147 ierr = PetscFree(a->a);CHKERRQ(ierr); 148 if (!a->singlemalloc) { 149 ierr = PetscFree(a->i);CHKERRQ(ierr); 150 ierr = PetscFree(a->j);CHKERRQ(ierr); 151 } 152 if (a->row) { 153 ierr = ISDestroy(a->row);CHKERRQ(ierr); 154 } 155 if (a->col) { 156 ierr = ISDestroy(a->col);CHKERRQ(ierr); 157 } 158 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 159 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 160 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 161 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 162 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 163 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 164 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 165 #if defined(PETSC_USE_MAT_SINGLE) 166 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 167 #endif 168 ierr = PetscFree(a);CHKERRQ(ierr); 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatSetOption_SeqBAIJ" 174 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 175 { 176 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 177 178 PetscFunctionBegin; 179 switch (op) { 180 case MAT_ROW_ORIENTED: 181 a->roworiented = PETSC_TRUE; 182 break; 183 case MAT_COLUMN_ORIENTED: 184 a->roworiented = PETSC_FALSE; 185 break; 186 case MAT_COLUMNS_SORTED: 187 a->sorted = PETSC_TRUE; 188 break; 189 case MAT_COLUMNS_UNSORTED: 190 a->sorted = PETSC_FALSE; 191 break; 192 case MAT_KEEP_ZEROED_ROWS: 193 a->keepzeroedrows = PETSC_TRUE; 194 break; 195 case MAT_NO_NEW_NONZERO_LOCATIONS: 196 a->nonew = 1; 197 break; 198 case MAT_NEW_NONZERO_LOCATION_ERR: 199 a->nonew = -1; 200 break; 201 case MAT_NEW_NONZERO_ALLOCATION_ERR: 202 a->nonew = -2; 203 break; 204 case MAT_YES_NEW_NONZERO_LOCATIONS: 205 a->nonew = 0; 206 break; 207 case MAT_ROWS_SORTED: 208 case MAT_ROWS_UNSORTED: 209 case MAT_YES_NEW_DIAGONALS: 210 case MAT_IGNORE_OFF_PROC_ENTRIES: 211 case MAT_USE_HASH_TABLE: 212 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 213 break; 214 case MAT_NO_NEW_DIAGONALS: 215 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 216 case MAT_USE_SINGLE_PRECISION_SOLVES: 217 if (a->bs==4) { 218 PetscTruth sse_enabled_local,sse_enabled_global; 219 int ierr; 220 221 sse_enabled_local = PETSC_FALSE; 222 sse_enabled_global = PETSC_FALSE; 223 224 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 225 #if defined(PETSC_HAVE_SSE) 226 if (sse_enabled_local) { 227 a->single_precision_solves = PETSC_TRUE; 228 A->ops->solve = MatSolve_SeqBAIJ_Update; 229 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 230 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 231 break; 232 } else { 233 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 234 } 235 #else 236 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 237 #endif 238 } 239 break; 240 default: 241 SETERRQ(PETSC_ERR_SUP,"unknown option"); 242 } 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatGetRow_SeqBAIJ" 248 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 251 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 252 MatScalar *aa,*aa_i; 253 PetscScalar *v_i; 254 255 PetscFunctionBegin; 256 bs = a->bs; 257 ai = a->i; 258 aj = a->j; 259 aa = a->a; 260 bs2 = a->bs2; 261 262 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 263 264 bn = row/bs; /* Block number */ 265 bp = row % bs; /* Block Position */ 266 M = ai[bn+1] - ai[bn]; 267 *nz = bs*M; 268 269 if (v) { 270 *v = 0; 271 if (*nz) { 272 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 273 for (i=0; i<M; i++) { /* for each block in the block row */ 274 v_i = *v + i*bs; 275 aa_i = aa + bs2*(ai[bn] + i); 276 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 277 } 278 } 279 } 280 281 if (idx) { 282 *idx = 0; 283 if (*nz) { 284 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 285 for (i=0; i<M; i++) { /* for each block in the block row */ 286 idx_i = *idx + i*bs; 287 itmp = bs*aj[ai[bn] + i]; 288 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 289 } 290 } 291 } 292 PetscFunctionReturn(0); 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 297 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 298 { 299 int ierr; 300 301 PetscFunctionBegin; 302 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 303 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatTranspose_SeqBAIJ" 309 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 310 { 311 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 312 Mat C; 313 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 314 int *rows,*cols,bs2=a->bs2; 315 PetscScalar *array; 316 317 PetscFunctionBegin; 318 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 319 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 320 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 321 322 #if defined(PETSC_USE_MAT_SINGLE) 323 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 324 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 325 #else 326 array = a->a; 327 #endif 328 329 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 330 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 331 ierr = PetscFree(col);CHKERRQ(ierr); 332 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 333 cols = rows + bs; 334 for (i=0; i<mbs; i++) { 335 cols[0] = i*bs; 336 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 337 len = ai[i+1] - ai[i]; 338 for (j=0; j<len; j++) { 339 rows[0] = (*aj++)*bs; 340 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 341 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 342 array += bs2; 343 } 344 } 345 ierr = PetscFree(rows);CHKERRQ(ierr); 346 #if defined(PETSC_USE_MAT_SINGLE) 347 ierr = PetscFree(array); 348 #endif 349 350 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 351 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 353 if (B) { 354 *B = C; 355 } else { 356 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 363 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 364 { 365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 366 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 367 PetscScalar *aa; 368 FILE *file; 369 370 PetscFunctionBegin; 371 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 372 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 373 col_lens[0] = MAT_FILE_COOKIE; 374 375 col_lens[1] = A->m; 376 col_lens[2] = A->n; 377 col_lens[3] = a->nz*bs2; 378 379 /* store lengths of each row and write (including header) to file */ 380 count = 0; 381 for (i=0; i<a->mbs; i++) { 382 for (j=0; j<bs; j++) { 383 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 384 } 385 } 386 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 387 ierr = PetscFree(col_lens);CHKERRQ(ierr); 388 389 /* store column indices (zero start index) */ 390 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 391 count = 0; 392 for (i=0; i<a->mbs; i++) { 393 for (j=0; j<bs; j++) { 394 for (k=a->i[i]; k<a->i[i+1]; k++) { 395 for (l=0; l<bs; l++) { 396 jj[count++] = bs*a->j[k] + l; 397 } 398 } 399 } 400 } 401 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 402 ierr = PetscFree(jj);CHKERRQ(ierr); 403 404 /* store nonzero values */ 405 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 406 count = 0; 407 for (i=0; i<a->mbs; i++) { 408 for (j=0; j<bs; j++) { 409 for (k=a->i[i]; k<a->i[i+1]; k++) { 410 for (l=0; l<bs; l++) { 411 aa[count++] = a->a[bs2*k + l*bs + j]; 412 } 413 } 414 } 415 } 416 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 417 ierr = PetscFree(aa);CHKERRQ(ierr); 418 419 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 420 if (file) { 421 fprintf(file,"-matload_block_size %d\n",a->bs); 422 } 423 PetscFunctionReturn(0); 424 } 425 426 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 431 { 432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 433 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 434 PetscViewerFormat format; 435 436 PetscFunctionBegin; 437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 439 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 441 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 442 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 443 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 444 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 445 #endif 446 PetscFunctionReturn(0); 447 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 448 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 449 for (i=0; i<a->mbs; i++) { 450 for (j=0; j<bs; j++) { 451 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 452 for (k=a->i[i]; k<a->i[i+1]; k++) { 453 for (l=0; l<bs; l++) { 454 #if defined(PETSC_USE_COMPLEX) 455 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 456 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 457 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 458 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 459 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 460 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 461 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 462 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 463 } 464 #else 465 if (a->a[bs2*k + l*bs + j] != 0.0) { 466 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 467 } 468 #endif 469 } 470 } 471 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 472 } 473 } 474 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 475 } else { 476 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 477 for (i=0; i<a->mbs; i++) { 478 for (j=0; j<bs; j++) { 479 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 480 for (k=a->i[i]; k<a->i[i+1]; k++) { 481 for (l=0; l<bs; l++) { 482 #if defined(PETSC_USE_COMPLEX) 483 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 484 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 485 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 486 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 487 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 488 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 489 } else { 490 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 491 } 492 #else 493 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 494 #endif 495 } 496 } 497 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 498 } 499 } 500 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 501 } 502 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 508 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 509 { 510 Mat A = (Mat) Aa; 511 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 512 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 513 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 514 MatScalar *aa; 515 PetscViewer viewer; 516 517 PetscFunctionBegin; 518 519 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 520 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 521 522 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 523 524 /* loop over matrix elements drawing boxes */ 525 color = PETSC_DRAW_BLUE; 526 for (i=0,row=0; i<mbs; i++,row+=bs) { 527 for (j=a->i[i]; j<a->i[i+1]; j++) { 528 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 529 x_l = a->j[j]*bs; x_r = x_l + 1.0; 530 aa = a->a + j*bs2; 531 for (k=0; k<bs; k++) { 532 for (l=0; l<bs; l++) { 533 if (PetscRealPart(*aa++) >= 0.) continue; 534 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 535 } 536 } 537 } 538 } 539 color = PETSC_DRAW_CYAN; 540 for (i=0,row=0; i<mbs; i++,row+=bs) { 541 for (j=a->i[i]; j<a->i[i+1]; j++) { 542 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 543 x_l = a->j[j]*bs; x_r = x_l + 1.0; 544 aa = a->a + j*bs2; 545 for (k=0; k<bs; k++) { 546 for (l=0; l<bs; l++) { 547 if (PetscRealPart(*aa++) != 0.) continue; 548 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 549 } 550 } 551 } 552 } 553 554 color = PETSC_DRAW_RED; 555 for (i=0,row=0; i<mbs; i++,row+=bs) { 556 for (j=a->i[i]; j<a->i[i+1]; j++) { 557 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 558 x_l = a->j[j]*bs; x_r = x_l + 1.0; 559 aa = a->a + j*bs2; 560 for (k=0; k<bs; k++) { 561 for (l=0; l<bs; l++) { 562 if (PetscRealPart(*aa++) <= 0.) continue; 563 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 564 } 565 } 566 } 567 } 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 573 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 574 { 575 int ierr; 576 PetscReal xl,yl,xr,yr,w,h; 577 PetscDraw draw; 578 PetscTruth isnull; 579 580 PetscFunctionBegin; 581 582 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 583 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 584 585 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 586 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 587 xr += w; yr += h; xl = -w; yl = -h; 588 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 589 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 590 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatView_SeqBAIJ" 596 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 597 { 598 int ierr; 599 PetscTruth issocket,isascii,isbinary,isdraw; 600 601 PetscFunctionBegin; 602 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 603 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 604 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 605 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 606 if (issocket) { 607 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 608 } else if (isascii){ 609 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 610 } else if (isbinary) { 611 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 612 } else if (isdraw) { 613 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 614 } else { 615 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 616 } 617 PetscFunctionReturn(0); 618 } 619 620 621 #undef __FUNCT__ 622 #define __FUNCT__ "MatGetValues_SeqBAIJ" 623 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 624 { 625 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 626 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 627 int *ai = a->i,*ailen = a->ilen; 628 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 629 MatScalar *ap,*aa = a->a,zero = 0.0; 630 631 PetscFunctionBegin; 632 for (k=0; k<m; k++) { /* loop over rows */ 633 row = im[k]; brow = row/bs; 634 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 635 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 636 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 637 nrow = ailen[brow]; 638 for (l=0; l<n; l++) { /* loop over columns */ 639 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 640 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 641 col = in[l] ; 642 bcol = col/bs; 643 cidx = col%bs; 644 ridx = row%bs; 645 high = nrow; 646 low = 0; /* assume unsorted */ 647 while (high-low > 5) { 648 t = (low+high)/2; 649 if (rp[t] > bcol) high = t; 650 else low = t; 651 } 652 for (i=low; i<high; i++) { 653 if (rp[i] > bcol) break; 654 if (rp[i] == bcol) { 655 *v++ = ap[bs2*i+bs*cidx+ridx]; 656 goto finished; 657 } 658 } 659 *v++ = zero; 660 finished:; 661 } 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #if defined(PETSC_USE_MAT_SINGLE) 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 669 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 670 { 671 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 672 int ierr,i,N = m*n*b->bs2; 673 MatScalar *vsingle; 674 675 PetscFunctionBegin; 676 if (N > b->setvalueslen) { 677 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 678 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 679 b->setvalueslen = N; 680 } 681 vsingle = b->setvaluescopy; 682 for (i=0; i<N; i++) { 683 vsingle[i] = v[i]; 684 } 685 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 #endif 689 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 693 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 694 { 695 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 696 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 697 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 698 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 699 PetscTruth roworiented=a->roworiented; 700 MatScalar *value = v,*ap,*aa = a->a,*bap; 701 702 PetscFunctionBegin; 703 if (roworiented) { 704 stepval = (n-1)*bs; 705 } else { 706 stepval = (m-1)*bs; 707 } 708 for (k=0; k<m; k++) { /* loop over added rows */ 709 row = im[k]; 710 if (row < 0) continue; 711 #if defined(PETSC_USE_BOPT_g) 712 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 713 #endif 714 rp = aj + ai[row]; 715 ap = aa + bs2*ai[row]; 716 rmax = imax[row]; 717 nrow = ailen[row]; 718 low = 0; 719 for (l=0; l<n; l++) { /* loop over added columns */ 720 if (in[l] < 0) continue; 721 #if defined(PETSC_USE_BOPT_g) 722 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 723 #endif 724 col = in[l]; 725 if (roworiented) { 726 value = v + k*(stepval+bs)*bs + l*bs; 727 } else { 728 value = v + l*(stepval+bs)*bs + k*bs; 729 } 730 if (!sorted) low = 0; high = nrow; 731 while (high-low > 7) { 732 t = (low+high)/2; 733 if (rp[t] > col) high = t; 734 else low = t; 735 } 736 for (i=low; i<high; i++) { 737 if (rp[i] > col) break; 738 if (rp[i] == col) { 739 bap = ap + bs2*i; 740 if (roworiented) { 741 if (is == ADD_VALUES) { 742 for (ii=0; ii<bs; ii++,value+=stepval) { 743 for (jj=ii; jj<bs2; jj+=bs) { 744 bap[jj] += *value++; 745 } 746 } 747 } else { 748 for (ii=0; ii<bs; ii++,value+=stepval) { 749 for (jj=ii; jj<bs2; jj+=bs) { 750 bap[jj] = *value++; 751 } 752 } 753 } 754 } else { 755 if (is == ADD_VALUES) { 756 for (ii=0; ii<bs; ii++,value+=stepval) { 757 for (jj=0; jj<bs; jj++) { 758 *bap++ += *value++; 759 } 760 } 761 } else { 762 for (ii=0; ii<bs; ii++,value+=stepval) { 763 for (jj=0; jj<bs; jj++) { 764 *bap++ = *value++; 765 } 766 } 767 } 768 } 769 goto noinsert2; 770 } 771 } 772 if (nonew == 1) goto noinsert2; 773 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 774 if (nrow >= rmax) { 775 /* there is no extra room in row, therefore enlarge */ 776 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 777 MatScalar *new_a; 778 779 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 780 781 /* malloc new storage space */ 782 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 783 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 784 new_j = (int*)(new_a + bs2*new_nz); 785 new_i = new_j + new_nz; 786 787 /* copy over old data into new slots */ 788 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 789 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 790 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 791 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 792 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 793 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 794 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 795 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 796 /* free up old matrix storage */ 797 ierr = PetscFree(a->a);CHKERRQ(ierr); 798 if (!a->singlemalloc) { 799 ierr = PetscFree(a->i);CHKERRQ(ierr); 800 ierr = PetscFree(a->j);CHKERRQ(ierr); 801 } 802 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 803 a->singlemalloc = PETSC_TRUE; 804 805 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 806 rmax = imax[row] = imax[row] + CHUNKSIZE; 807 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 808 a->maxnz += bs2*CHUNKSIZE; 809 a->reallocs++; 810 a->nz++; 811 } 812 N = nrow++ - 1; 813 /* shift up all the later entries in this row */ 814 for (ii=N; ii>=i; ii--) { 815 rp[ii+1] = rp[ii]; 816 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 817 } 818 if (N >= i) { 819 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 820 } 821 rp[i] = col; 822 bap = ap + bs2*i; 823 if (roworiented) { 824 for (ii=0; ii<bs; ii++,value+=stepval) { 825 for (jj=ii; jj<bs2; jj+=bs) { 826 bap[jj] = *value++; 827 } 828 } 829 } else { 830 for (ii=0; ii<bs; ii++,value+=stepval) { 831 for (jj=0; jj<bs; jj++) { 832 *bap++ = *value++; 833 } 834 } 835 } 836 noinsert2:; 837 low = i; 838 } 839 ailen[row] = nrow; 840 } 841 PetscFunctionReturn(0); 842 } 843 844 /* EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); */ 845 #undef __FUNCT__ 846 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 847 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 848 { 849 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 850 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 851 int m = A->m,*ip,N,*ailen = a->ilen; 852 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 853 MatScalar *aa = a->a,*ap; 854 #if defined(PETSC_HAVE_DSCPACK) 855 PetscTruth flag; 856 #endif 857 858 PetscFunctionBegin; 859 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 860 861 if (m) rmax = ailen[0]; 862 for (i=1; i<mbs; i++) { 863 /* move each row back by the amount of empty slots (fshift) before it*/ 864 fshift += imax[i-1] - ailen[i-1]; 865 rmax = PetscMax(rmax,ailen[i]); 866 if (fshift) { 867 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 868 N = ailen[i]; 869 for (j=0; j<N; j++) { 870 ip[j-fshift] = ip[j]; 871 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 872 } 873 } 874 ai[i] = ai[i-1] + ailen[i-1]; 875 } 876 if (mbs) { 877 fshift += imax[mbs-1] - ailen[mbs-1]; 878 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 879 } 880 /* reset ilen and imax for each row */ 881 for (i=0; i<mbs; i++) { 882 ailen[i] = imax[i] = ai[i+1] - ai[i]; 883 } 884 a->nz = ai[mbs]; 885 886 /* diagonals may have moved, so kill the diagonal pointers */ 887 if (fshift && a->diag) { 888 ierr = PetscFree(a->diag);CHKERRQ(ierr); 889 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 890 a->diag = 0; 891 } 892 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 893 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 894 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 895 a->reallocs = 0; 896 A->info.nz_unneeded = (PetscReal)fshift*bs2; 897 #if defined(PETSC_HAVE_DSCPACK) 898 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 899 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 900 #endif 901 902 903 PetscFunctionReturn(0); 904 } 905 906 907 908 /* 909 This function returns an array of flags which indicate the locations of contiguous 910 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 911 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 912 Assume: sizes should be long enough to hold all the values. 913 */ 914 #undef __FUNCT__ 915 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 916 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 917 { 918 int i,j,k,row; 919 PetscTruth flg; 920 921 PetscFunctionBegin; 922 for (i=0,j=0; i<n; j++) { 923 row = idx[i]; 924 if (row%bs!=0) { /* Not the begining of a block */ 925 sizes[j] = 1; 926 i++; 927 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 928 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 929 i++; 930 } else { /* Begining of the block, so check if the complete block exists */ 931 flg = PETSC_TRUE; 932 for (k=1; k<bs; k++) { 933 if (row+k != idx[i+k]) { /* break in the block */ 934 flg = PETSC_FALSE; 935 break; 936 } 937 } 938 if (flg == PETSC_TRUE) { /* No break in the bs */ 939 sizes[j] = bs; 940 i+= bs; 941 } else { 942 sizes[j] = 1; 943 i++; 944 } 945 } 946 } 947 *bs_max = j; 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 953 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 954 { 955 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 956 int ierr,i,j,k,count,is_n,*is_idx,*rows; 957 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 958 PetscScalar zero = 0.0; 959 MatScalar *aa; 960 961 PetscFunctionBegin; 962 /* Make a copy of the IS and sort it */ 963 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 964 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 965 966 /* allocate memory for rows,sizes */ 967 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 968 sizes = rows + is_n; 969 970 /* copy IS values to rows, and sort them */ 971 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 972 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 973 if (baij->keepzeroedrows) { 974 for (i=0; i<is_n; i++) { sizes[i] = 1; } 975 bs_max = is_n; 976 } else { 977 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 978 } 979 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 980 981 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 982 row = rows[j]; 983 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 984 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 985 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 986 if (sizes[i] == bs && !baij->keepzeroedrows) { 987 if (diag) { 988 if (baij->ilen[row/bs] > 0) { 989 baij->ilen[row/bs] = 1; 990 baij->j[baij->i[row/bs]] = row/bs; 991 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 992 } 993 /* Now insert all the diagonal values for this bs */ 994 for (k=0; k<bs; k++) { 995 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 996 } 997 } else { /* (!diag) */ 998 baij->ilen[row/bs] = 0; 999 } /* end (!diag) */ 1000 } else { /* (sizes[i] != bs) */ 1001 #if defined (PETSC_USE_DEBUG) 1002 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1003 #endif 1004 for (k=0; k<count; k++) { 1005 aa[0] = zero; 1006 aa += bs; 1007 } 1008 if (diag) { 1009 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1010 } 1011 } 1012 } 1013 1014 ierr = PetscFree(rows);CHKERRQ(ierr); 1015 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1021 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1022 { 1023 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1024 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1025 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1026 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1027 int ridx,cidx,bs2=a->bs2,ierr; 1028 PetscTruth roworiented=a->roworiented; 1029 MatScalar *ap,value,*aa=a->a,*bap; 1030 1031 PetscFunctionBegin; 1032 for (k=0; k<m; k++) { /* loop over added rows */ 1033 row = im[k]; brow = row/bs; 1034 if (row < 0) continue; 1035 #if defined(PETSC_USE_BOPT_g) 1036 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1037 #endif 1038 rp = aj + ai[brow]; 1039 ap = aa + bs2*ai[brow]; 1040 rmax = imax[brow]; 1041 nrow = ailen[brow]; 1042 low = 0; 1043 for (l=0; l<n; l++) { /* loop over added columns */ 1044 if (in[l] < 0) continue; 1045 #if defined(PETSC_USE_BOPT_g) 1046 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1047 #endif 1048 col = in[l]; bcol = col/bs; 1049 ridx = row % bs; cidx = col % bs; 1050 if (roworiented) { 1051 value = v[l + k*n]; 1052 } else { 1053 value = v[k + l*m]; 1054 } 1055 if (!sorted) low = 0; high = nrow; 1056 while (high-low > 7) { 1057 t = (low+high)/2; 1058 if (rp[t] > bcol) high = t; 1059 else low = t; 1060 } 1061 for (i=low; i<high; i++) { 1062 if (rp[i] > bcol) break; 1063 if (rp[i] == bcol) { 1064 bap = ap + bs2*i + bs*cidx + ridx; 1065 if (is == ADD_VALUES) *bap += value; 1066 else *bap = value; 1067 goto noinsert1; 1068 } 1069 } 1070 if (nonew == 1) goto noinsert1; 1071 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1072 if (nrow >= rmax) { 1073 /* there is no extra room in row, therefore enlarge */ 1074 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1075 MatScalar *new_a; 1076 1077 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1078 1079 /* Malloc new storage space */ 1080 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1081 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1082 new_j = (int*)(new_a + bs2*new_nz); 1083 new_i = new_j + new_nz; 1084 1085 /* copy over old data into new slots */ 1086 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1087 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1088 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1089 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1090 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1091 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1092 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1093 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1094 /* free up old matrix storage */ 1095 ierr = PetscFree(a->a);CHKERRQ(ierr); 1096 if (!a->singlemalloc) { 1097 ierr = PetscFree(a->i);CHKERRQ(ierr); 1098 ierr = PetscFree(a->j);CHKERRQ(ierr); 1099 } 1100 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1101 a->singlemalloc = PETSC_TRUE; 1102 1103 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1104 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1105 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1106 a->maxnz += bs2*CHUNKSIZE; 1107 a->reallocs++; 1108 a->nz++; 1109 } 1110 N = nrow++ - 1; 1111 /* shift up all the later entries in this row */ 1112 for (ii=N; ii>=i; ii--) { 1113 rp[ii+1] = rp[ii]; 1114 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1115 } 1116 if (N>=i) { 1117 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1118 } 1119 rp[i] = bcol; 1120 ap[bs2*i + bs*cidx + ridx] = value; 1121 noinsert1:; 1122 low = i; 1123 } 1124 ailen[brow] = nrow; 1125 } 1126 PetscFunctionReturn(0); 1127 } 1128 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1132 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1133 { 1134 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1135 Mat outA; 1136 int ierr; 1137 PetscTruth row_identity,col_identity; 1138 1139 PetscFunctionBegin; 1140 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1141 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1142 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1143 if (!row_identity || !col_identity) { 1144 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1145 } 1146 1147 outA = inA; 1148 inA->factor = FACTOR_LU; 1149 1150 if (!a->diag) { 1151 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1152 } 1153 1154 a->row = row; 1155 a->col = col; 1156 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1157 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1158 1159 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1160 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1161 PetscLogObjectParent(inA,a->icol); 1162 1163 /* 1164 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1165 for ILU(0) factorization with natural ordering 1166 */ 1167 if (a->bs < 8) { 1168 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1169 } else { 1170 if (!a->solve_work) { 1171 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1172 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1173 } 1174 } 1175 1176 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1177 1178 PetscFunctionReturn(0); 1179 } 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1182 int MatPrintHelp_SeqBAIJ(Mat A) 1183 { 1184 static PetscTruth called = PETSC_FALSE; 1185 MPI_Comm comm = A->comm; 1186 int ierr; 1187 1188 PetscFunctionBegin; 1189 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1190 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1191 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 EXTERN_C_BEGIN 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1198 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1199 { 1200 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1201 int i,nz,nbs; 1202 1203 PetscFunctionBegin; 1204 nz = baij->maxnz/baij->bs2; 1205 nbs = baij->nbs; 1206 for (i=0; i<nz; i++) { 1207 baij->j[i] = indices[i]; 1208 } 1209 baij->nz = nz; 1210 for (i=0; i<nbs; i++) { 1211 baij->ilen[i] = baij->imax[i]; 1212 } 1213 1214 PetscFunctionReturn(0); 1215 } 1216 EXTERN_C_END 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1220 /*@ 1221 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1222 in the matrix. 1223 1224 Input Parameters: 1225 + mat - the SeqBAIJ matrix 1226 - indices - the column indices 1227 1228 Level: advanced 1229 1230 Notes: 1231 This can be called if you have precomputed the nonzero structure of the 1232 matrix and want to provide it to the matrix object to improve the performance 1233 of the MatSetValues() operation. 1234 1235 You MUST have set the correct numbers of nonzeros per row in the call to 1236 MatCreateSeqBAIJ(). 1237 1238 MUST be called before any calls to MatSetValues(); 1239 1240 @*/ 1241 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1242 { 1243 int ierr,(*f)(Mat,int *); 1244 1245 PetscFunctionBegin; 1246 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1247 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1248 if (f) { 1249 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1250 } else { 1251 SETERRQ(1,"Wrong type of matrix to set column indices"); 1252 } 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1258 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1259 { 1260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1261 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1262 PetscReal atmp; 1263 PetscScalar *x,zero = 0.0; 1264 MatScalar *aa; 1265 int ncols,brow,krow,kcol; 1266 1267 PetscFunctionBegin; 1268 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1269 bs = a->bs; 1270 aa = a->a; 1271 ai = a->i; 1272 aj = a->j; 1273 mbs = a->mbs; 1274 1275 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1276 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1277 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1278 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1279 for (i=0; i<mbs; i++) { 1280 ncols = ai[1] - ai[0]; ai++; 1281 brow = bs*i; 1282 for (j=0; j<ncols; j++){ 1283 /* bcol = bs*(*aj); */ 1284 for (kcol=0; kcol<bs; kcol++){ 1285 for (krow=0; krow<bs; krow++){ 1286 atmp = PetscAbsScalar(*aa); aa++; 1287 row = brow + krow; /* row index */ 1288 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1289 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1290 } 1291 } 1292 aj++; 1293 } 1294 } 1295 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1301 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1302 { 1303 int ierr; 1304 1305 PetscFunctionBegin; 1306 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1312 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1313 { 1314 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1315 PetscFunctionBegin; 1316 *array = a->a; 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1322 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1323 { 1324 PetscFunctionBegin; 1325 PetscFunctionReturn(0); 1326 } 1327 1328 /* -------------------------------------------------------------------*/ 1329 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1330 MatGetRow_SeqBAIJ, 1331 MatRestoreRow_SeqBAIJ, 1332 MatMult_SeqBAIJ_N, 1333 MatMultAdd_SeqBAIJ_N, 1334 MatMultTranspose_SeqBAIJ, 1335 MatMultTransposeAdd_SeqBAIJ, 1336 MatSolve_SeqBAIJ_N, 1337 0, 1338 0, 1339 0, 1340 MatLUFactor_SeqBAIJ, 1341 0, 1342 0, 1343 MatTranspose_SeqBAIJ, 1344 MatGetInfo_SeqBAIJ, 1345 MatEqual_SeqBAIJ, 1346 MatGetDiagonal_SeqBAIJ, 1347 MatDiagonalScale_SeqBAIJ, 1348 MatNorm_SeqBAIJ, 1349 0, 1350 MatAssemblyEnd_SeqBAIJ, 1351 0, 1352 MatSetOption_SeqBAIJ, 1353 MatZeroEntries_SeqBAIJ, 1354 MatZeroRows_SeqBAIJ, 1355 MatLUFactorSymbolic_SeqBAIJ, 1356 MatLUFactorNumeric_SeqBAIJ_N, 1357 0, 1358 0, 1359 MatSetUpPreallocation_SeqBAIJ, 1360 MatILUFactorSymbolic_SeqBAIJ, 1361 0, 1362 MatGetArray_SeqBAIJ, 1363 MatRestoreArray_SeqBAIJ, 1364 MatDuplicate_SeqBAIJ, 1365 0, 1366 0, 1367 MatILUFactor_SeqBAIJ, 1368 0, 1369 0, 1370 MatGetSubMatrices_SeqBAIJ, 1371 MatIncreaseOverlap_SeqBAIJ, 1372 MatGetValues_SeqBAIJ, 1373 0, 1374 MatPrintHelp_SeqBAIJ, 1375 MatScale_SeqBAIJ, 1376 0, 1377 0, 1378 0, 1379 MatGetBlockSize_SeqBAIJ, 1380 MatGetRowIJ_SeqBAIJ, 1381 MatRestoreRowIJ_SeqBAIJ, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 0, 1388 MatSetValuesBlocked_SeqBAIJ, 1389 MatGetSubMatrix_SeqBAIJ, 1390 MatDestroy_SeqBAIJ, 1391 MatView_SeqBAIJ, 1392 MatGetPetscMaps_Petsc, 1393 0, 1394 0, 1395 0, 1396 0, 1397 0, 1398 0, 1399 MatGetRowMax_SeqBAIJ, 1400 MatConvert_Basic}; 1401 1402 EXTERN_C_BEGIN 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1405 int MatStoreValues_SeqBAIJ(Mat mat) 1406 { 1407 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1408 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1409 int ierr; 1410 1411 PetscFunctionBegin; 1412 if (aij->nonew != 1) { 1413 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1414 } 1415 1416 /* allocate space for values if not already there */ 1417 if (!aij->saved_values) { 1418 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1419 } 1420 1421 /* copy values over */ 1422 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 EXTERN_C_END 1426 1427 EXTERN_C_BEGIN 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1430 int MatRetrieveValues_SeqBAIJ(Mat mat) 1431 { 1432 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1433 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1434 1435 PetscFunctionBegin; 1436 if (aij->nonew != 1) { 1437 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1438 } 1439 if (!aij->saved_values) { 1440 SETERRQ(1,"Must call MatStoreValues(A);first"); 1441 } 1442 1443 /* copy values over */ 1444 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 EXTERN_C_END 1448 1449 EXTERN_C_BEGIN 1450 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1451 EXTERN_C_END 1452 1453 EXTERN_C_BEGIN 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatCreate_SeqBAIJ" 1456 int MatCreate_SeqBAIJ(Mat B) 1457 { 1458 int ierr,size; 1459 Mat_SeqBAIJ *b; 1460 1461 PetscFunctionBegin; 1462 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1463 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1464 1465 B->m = B->M = PetscMax(B->m,B->M); 1466 B->n = B->N = PetscMax(B->n,B->N); 1467 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1468 B->data = (void*)b; 1469 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1470 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1471 B->factor = 0; 1472 B->lupivotthreshold = 1.0; 1473 B->mapping = 0; 1474 b->row = 0; 1475 b->col = 0; 1476 b->icol = 0; 1477 b->reallocs = 0; 1478 b->saved_values = 0; 1479 #if defined(PETSC_USE_MAT_SINGLE) 1480 b->setvalueslen = 0; 1481 b->setvaluescopy = PETSC_NULL; 1482 #endif 1483 b->single_precision_solves = PETSC_FALSE; 1484 1485 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1486 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1487 1488 b->sorted = PETSC_FALSE; 1489 b->roworiented = PETSC_TRUE; 1490 b->nonew = 0; 1491 b->diag = 0; 1492 b->solve_work = 0; 1493 b->mult_work = 0; 1494 b->spptr = 0; 1495 B->info.nz_unneeded = (PetscReal)b->maxnz; 1496 b->keepzeroedrows = PETSC_FALSE; 1497 1498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1499 "MatStoreValues_SeqBAIJ", 1500 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1502 "MatRetrieveValues_SeqBAIJ", 1503 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1505 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1506 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1507 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1508 "MatConvert_SeqBAIJ_SeqAIJ", 1509 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 EXTERN_C_END 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1516 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1517 { 1518 Mat C; 1519 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1520 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1521 1522 PetscFunctionBegin; 1523 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1524 1525 *B = 0; 1526 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1527 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1528 c = (Mat_SeqBAIJ*)C->data; 1529 1530 c->bs = a->bs; 1531 c->bs2 = a->bs2; 1532 c->mbs = a->mbs; 1533 c->nbs = a->nbs; 1534 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1535 1536 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1537 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1538 for (i=0; i<mbs; i++) { 1539 c->imax[i] = a->imax[i]; 1540 c->ilen[i] = a->ilen[i]; 1541 } 1542 1543 /* allocate the matrix space */ 1544 c->singlemalloc = PETSC_TRUE; 1545 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1546 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1547 c->j = (int*)(c->a + nz*bs2); 1548 c->i = c->j + nz; 1549 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1550 if (mbs > 0) { 1551 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1552 if (cpvalues == MAT_COPY_VALUES) { 1553 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1554 } else { 1555 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1556 } 1557 } 1558 1559 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1560 c->sorted = a->sorted; 1561 c->roworiented = a->roworiented; 1562 c->nonew = a->nonew; 1563 1564 if (a->diag) { 1565 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1566 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1567 for (i=0; i<mbs; i++) { 1568 c->diag[i] = a->diag[i]; 1569 } 1570 } else c->diag = 0; 1571 c->nz = a->nz; 1572 c->maxnz = a->maxnz; 1573 c->solve_work = 0; 1574 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1575 c->mult_work = 0; 1576 C->preallocated = PETSC_TRUE; 1577 C->assembled = PETSC_TRUE; 1578 *B = C; 1579 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1580 PetscFunctionReturn(0); 1581 } 1582 1583 EXTERN_C_BEGIN 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatLoad_SeqBAIJ" 1586 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1587 { 1588 Mat_SeqBAIJ *a; 1589 Mat B; 1590 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1591 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1592 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1593 int *masked,nmask,tmp,bs2,ishift; 1594 PetscScalar *aa; 1595 MPI_Comm comm = ((PetscObject)viewer)->comm; 1596 1597 PetscFunctionBegin; 1598 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1599 bs2 = bs*bs; 1600 1601 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1602 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1603 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1604 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1605 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1606 M = header[1]; N = header[2]; nz = header[3]; 1607 1608 if (header[3] < 0) { 1609 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1610 } 1611 1612 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1613 1614 /* 1615 This code adds extra rows to make sure the number of rows is 1616 divisible by the blocksize 1617 */ 1618 mbs = M/bs; 1619 extra_rows = bs - M + bs*(mbs); 1620 if (extra_rows == bs) extra_rows = 0; 1621 else mbs++; 1622 if (extra_rows) { 1623 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1624 } 1625 1626 /* read in row lengths */ 1627 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1628 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1629 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1630 1631 /* read in column indices */ 1632 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1633 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1634 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1635 1636 /* loop over row lengths determining block row lengths */ 1637 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1638 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1639 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1640 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1641 masked = mask + mbs; 1642 rowcount = 0; nzcount = 0; 1643 for (i=0; i<mbs; i++) { 1644 nmask = 0; 1645 for (j=0; j<bs; j++) { 1646 kmax = rowlengths[rowcount]; 1647 for (k=0; k<kmax; k++) { 1648 tmp = jj[nzcount++]/bs; 1649 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1650 } 1651 rowcount++; 1652 } 1653 browlengths[i] += nmask; 1654 /* zero out the mask elements we set */ 1655 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1656 } 1657 1658 /* create our matrix */ 1659 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1660 B = *A; 1661 a = (Mat_SeqBAIJ*)B->data; 1662 1663 /* set matrix "i" values */ 1664 a->i[0] = 0; 1665 for (i=1; i<= mbs; i++) { 1666 a->i[i] = a->i[i-1] + browlengths[i-1]; 1667 a->ilen[i-1] = browlengths[i-1]; 1668 } 1669 a->nz = 0; 1670 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1671 1672 /* read in nonzero values */ 1673 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1674 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1675 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1676 1677 /* set "a" and "j" values into matrix */ 1678 nzcount = 0; jcount = 0; 1679 for (i=0; i<mbs; i++) { 1680 nzcountb = nzcount; 1681 nmask = 0; 1682 for (j=0; j<bs; j++) { 1683 kmax = rowlengths[i*bs+j]; 1684 for (k=0; k<kmax; k++) { 1685 tmp = jj[nzcount++]/bs; 1686 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1687 } 1688 } 1689 /* sort the masked values */ 1690 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1691 1692 /* set "j" values into matrix */ 1693 maskcount = 1; 1694 for (j=0; j<nmask; j++) { 1695 a->j[jcount++] = masked[j]; 1696 mask[masked[j]] = maskcount++; 1697 } 1698 /* set "a" values into matrix */ 1699 ishift = bs2*a->i[i]; 1700 for (j=0; j<bs; j++) { 1701 kmax = rowlengths[i*bs+j]; 1702 for (k=0; k<kmax; k++) { 1703 tmp = jj[nzcountb]/bs ; 1704 block = mask[tmp] - 1; 1705 point = jj[nzcountb] - bs*tmp; 1706 idx = ishift + bs2*block + j + bs*point; 1707 a->a[idx] = (MatScalar)aa[nzcountb++]; 1708 } 1709 } 1710 /* zero out the mask elements we set */ 1711 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1712 } 1713 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1714 1715 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1716 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1717 ierr = PetscFree(aa);CHKERRQ(ierr); 1718 ierr = PetscFree(jj);CHKERRQ(ierr); 1719 ierr = PetscFree(mask);CHKERRQ(ierr); 1720 1721 B->assembled = PETSC_TRUE; 1722 1723 ierr = MatView_Private(B);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 EXTERN_C_END 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatCreateSeqBAIJ" 1730 /*@C 1731 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1732 compressed row) format. For good matrix assembly performance the 1733 user should preallocate the matrix storage by setting the parameter nz 1734 (or the array nnz). By setting these parameters accurately, performance 1735 during matrix assembly can be increased by more than a factor of 50. 1736 1737 Collective on MPI_Comm 1738 1739 Input Parameters: 1740 + comm - MPI communicator, set to PETSC_COMM_SELF 1741 . bs - size of block 1742 . m - number of rows 1743 . n - number of columns 1744 . nz - number of nonzero blocks per block row (same for all rows) 1745 - nnz - array containing the number of nonzero blocks in the various block rows 1746 (possibly different for each block row) or PETSC_NULL 1747 1748 Output Parameter: 1749 . A - the matrix 1750 1751 Options Database Keys: 1752 . -mat_no_unroll - uses code that does not unroll the loops in the 1753 block calculations (much slower) 1754 . -mat_block_size - size of the blocks to use 1755 1756 Level: intermediate 1757 1758 Notes: 1759 A nonzero block is any block that as 1 or more nonzeros in it 1760 1761 The block AIJ format is fully compatible with standard Fortran 77 1762 storage. That is, the stored row and column indices can begin at 1763 either one (as in Fortran) or zero. See the users' manual for details. 1764 1765 Specify the preallocated storage with either nz or nnz (not both). 1766 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1767 allocation. For additional details, see the users manual chapter on 1768 matrices. 1769 1770 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1771 @*/ 1772 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1773 { 1774 int ierr; 1775 1776 PetscFunctionBegin; 1777 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1778 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1779 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1785 /*@C 1786 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1787 per row in the matrix. For good matrix assembly performance the 1788 user should preallocate the matrix storage by setting the parameter nz 1789 (or the array nnz). By setting these parameters accurately, performance 1790 during matrix assembly can be increased by more than a factor of 50. 1791 1792 Collective on MPI_Comm 1793 1794 Input Parameters: 1795 + A - the matrix 1796 . bs - size of block 1797 . nz - number of block nonzeros per block row (same for all rows) 1798 - nnz - array containing the number of block nonzeros in the various block rows 1799 (possibly different for each block row) or PETSC_NULL 1800 1801 Options Database Keys: 1802 . -mat_no_unroll - uses code that does not unroll the loops in the 1803 block calculations (much slower) 1804 . -mat_block_size - size of the blocks to use 1805 1806 Level: intermediate 1807 1808 Notes: 1809 The block AIJ format is fully compatible with standard Fortran 77 1810 storage. That is, the stored row and column indices can begin at 1811 either one (as in Fortran) or zero. See the users' manual for details. 1812 1813 Specify the preallocated storage with either nz or nnz (not both). 1814 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1815 allocation. For additional details, see the users manual chapter on 1816 matrices. 1817 1818 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1819 @*/ 1820 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1821 { 1822 Mat_SeqBAIJ *b; 1823 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1824 PetscTruth flg; 1825 1826 PetscFunctionBegin; 1827 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1828 if (!flg) PetscFunctionReturn(0); 1829 1830 B->preallocated = PETSC_TRUE; 1831 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1832 if (nnz && newbs != bs) { 1833 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1834 } 1835 bs = newbs; 1836 1837 mbs = B->m/bs; 1838 nbs = B->n/bs; 1839 bs2 = bs*bs; 1840 1841 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1842 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1843 } 1844 1845 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1846 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1847 if (nnz) { 1848 for (i=0; i<mbs; i++) { 1849 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1850 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1851 } 1852 } 1853 1854 b = (Mat_SeqBAIJ*)B->data; 1855 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1856 B->ops->solve = MatSolve_SeqBAIJ_Update; 1857 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1858 if (!flg) { 1859 switch (bs) { 1860 case 1: 1861 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1862 B->ops->mult = MatMult_SeqBAIJ_1; 1863 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1864 break; 1865 case 2: 1866 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1867 B->ops->mult = MatMult_SeqBAIJ_2; 1868 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1869 break; 1870 case 3: 1871 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1872 B->ops->mult = MatMult_SeqBAIJ_3; 1873 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1874 break; 1875 case 4: 1876 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1877 B->ops->mult = MatMult_SeqBAIJ_4; 1878 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1879 break; 1880 case 5: 1881 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1882 B->ops->mult = MatMult_SeqBAIJ_5; 1883 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1884 break; 1885 case 6: 1886 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1887 B->ops->mult = MatMult_SeqBAIJ_6; 1888 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1889 break; 1890 case 7: 1891 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1892 B->ops->mult = MatMult_SeqBAIJ_7; 1893 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1894 break; 1895 default: 1896 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1897 B->ops->mult = MatMult_SeqBAIJ_N; 1898 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1899 break; 1900 } 1901 } 1902 b->bs = bs; 1903 b->mbs = mbs; 1904 b->nbs = nbs; 1905 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1906 if (!nnz) { 1907 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1908 else if (nz <= 0) nz = 1; 1909 for (i=0; i<mbs; i++) b->imax[i] = nz; 1910 nz = nz*mbs; 1911 } else { 1912 nz = 0; 1913 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1914 } 1915 1916 /* allocate the matrix space */ 1917 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1918 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1919 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1920 b->j = (int*)(b->a + nz*bs2); 1921 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1922 b->i = b->j + nz; 1923 b->singlemalloc = PETSC_TRUE; 1924 1925 b->i[0] = 0; 1926 for (i=1; i<mbs+1; i++) { 1927 b->i[i] = b->i[i-1] + b->imax[i-1]; 1928 } 1929 1930 /* b->ilen will count nonzeros in each block row so far. */ 1931 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1932 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1933 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1934 1935 b->bs = bs; 1936 b->bs2 = bs2; 1937 b->mbs = mbs; 1938 b->nz = 0; 1939 b->maxnz = nz*bs2; 1940 B->info.nz_unneeded = (PetscReal)b->maxnz; 1941 PetscFunctionReturn(0); 1942 } 1943 1944