1 /*$Id: baij.c,v 1.231 2001/07/11 04:27:45 buschelm Exp buschelm $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 #if defined(PETSC_USE_MAT_SINGLE) 216 if (a->bs==4) { 217 PetscTruth sse_enabled; 218 int ierr; 219 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 220 if (sse_enabled) { 221 a->singleprecisionsolves = PETSC_TRUE; 222 } else { 223 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 224 } 225 } else { 226 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 227 } 228 #else 229 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 230 #endif 231 break; 232 default: 233 SETERRQ(PETSC_ERR_SUP,"unknown option"); 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 241 { 242 PetscFunctionBegin; 243 if (m) *m = 0; 244 if (n) *n = A->m; 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatGetRow_SeqBAIJ" 250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 251 { 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 253 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 254 MatScalar *aa,*aa_i; 255 Scalar *v_i; 256 257 PetscFunctionBegin; 258 bs = a->bs; 259 ai = a->i; 260 aj = a->j; 261 aa = a->a; 262 bs2 = a->bs2; 263 264 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 265 266 bn = row/bs; /* Block number */ 267 bp = row % bs; /* Block Position */ 268 M = ai[bn+1] - ai[bn]; 269 *nz = bs*M; 270 271 if (v) { 272 *v = 0; 273 if (*nz) { 274 ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr); 275 for (i=0; i<M; i++) { /* for each block in the block row */ 276 v_i = *v + i*bs; 277 aa_i = aa + bs2*(ai[bn] + i); 278 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 279 } 280 } 281 } 282 283 if (idx) { 284 *idx = 0; 285 if (*nz) { 286 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 287 for (i=0; i<M; i++) { /* for each block in the block row */ 288 idx_i = *idx + i*bs; 289 itmp = bs*aj[ai[bn] + i]; 290 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 291 } 292 } 293 } 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 300 { 301 int ierr; 302 303 PetscFunctionBegin; 304 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 305 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatTranspose_SeqBAIJ" 311 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 312 { 313 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 314 Mat C; 315 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 316 int *rows,*cols,bs2=a->bs2; 317 Scalar *array; 318 319 PetscFunctionBegin; 320 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 321 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 322 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 323 324 #if defined(PETSC_USE_MAT_SINGLE) 325 ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr); 326 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 327 #else 328 array = a->a; 329 #endif 330 331 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 332 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 333 ierr = PetscFree(col);CHKERRQ(ierr); 334 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 335 cols = rows + bs; 336 for (i=0; i<mbs; i++) { 337 cols[0] = i*bs; 338 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 339 len = ai[i+1] - ai[i]; 340 for (j=0; j<len; j++) { 341 rows[0] = (*aj++)*bs; 342 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 343 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 344 array += bs2; 345 } 346 } 347 ierr = PetscFree(rows);CHKERRQ(ierr); 348 #if defined(PETSC_USE_MAT_SINGLE) 349 ierr = PetscFree(array); 350 #endif 351 352 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 354 355 if (B) { 356 *B = C; 357 } else { 358 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 359 } 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 366 { 367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 368 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 369 Scalar *aa; 370 FILE *file; 371 372 PetscFunctionBegin; 373 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 374 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 375 col_lens[0] = MAT_COOKIE; 376 377 col_lens[1] = A->m; 378 col_lens[2] = A->n; 379 col_lens[3] = a->nz*bs2; 380 381 /* store lengths of each row and write (including header) to file */ 382 count = 0; 383 for (i=0; i<a->mbs; i++) { 384 for (j=0; j<bs; j++) { 385 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 386 } 387 } 388 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 389 ierr = PetscFree(col_lens);CHKERRQ(ierr); 390 391 /* store column indices (zero start index) */ 392 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 393 count = 0; 394 for (i=0; i<a->mbs; i++) { 395 for (j=0; j<bs; j++) { 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 jj[count++] = bs*a->j[k] + l; 399 } 400 } 401 } 402 } 403 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 404 ierr = PetscFree(jj);CHKERRQ(ierr); 405 406 /* store nonzero values */ 407 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr); 408 count = 0; 409 for (i=0; i<a->mbs; i++) { 410 for (j=0; j<bs; j++) { 411 for (k=a->i[i]; k<a->i[i+1]; k++) { 412 for (l=0; l<bs; l++) { 413 aa[count++] = a->a[bs2*k + l*bs + j]; 414 } 415 } 416 } 417 } 418 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 419 ierr = PetscFree(aa);CHKERRQ(ierr); 420 421 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 422 if (file) { 423 fprintf(file,"-matload_block_size %d\n",a->bs); 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 431 { 432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 433 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 434 PetscViewerFormat format; 435 436 PetscFunctionBegin; 437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 439 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 441 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 442 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 443 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 444 for (i=0; i<a->mbs; i++) { 445 for (j=0; j<bs; j++) { 446 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 447 for (k=a->i[i]; k<a->i[i+1]; k++) { 448 for (l=0; l<bs; l++) { 449 #if defined(PETSC_USE_COMPLEX) 450 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 451 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 452 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 454 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 455 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 456 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 457 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 458 } 459 #else 460 if (a->a[bs2*k + l*bs + j] != 0.0) { 461 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 462 } 463 #endif 464 } 465 } 466 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 467 } 468 } 469 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 470 } else { 471 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 472 for (i=0; i<a->mbs; i++) { 473 for (j=0; j<bs; j++) { 474 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 475 for (k=a->i[i]; k<a->i[i+1]; k++) { 476 for (l=0; l<bs; l++) { 477 #if defined(PETSC_USE_COMPLEX) 478 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 479 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 480 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 482 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 483 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 484 } else { 485 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 486 } 487 #else 488 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 489 #endif 490 } 491 } 492 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 493 } 494 } 495 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 496 } 497 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNCT__ 502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 504 { 505 Mat A = (Mat) Aa; 506 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 507 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 508 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 509 MatScalar *aa; 510 PetscViewer viewer; 511 512 PetscFunctionBegin; 513 514 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 515 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 516 517 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 518 519 /* loop over matrix elements drawing boxes */ 520 color = PETSC_DRAW_BLUE; 521 for (i=0,row=0; i<mbs; i++,row+=bs) { 522 for (j=a->i[i]; j<a->i[i+1]; j++) { 523 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 524 x_l = a->j[j]*bs; x_r = x_l + 1.0; 525 aa = a->a + j*bs2; 526 for (k=0; k<bs; k++) { 527 for (l=0; l<bs; l++) { 528 if (PetscRealPart(*aa++) >= 0.) continue; 529 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 530 } 531 } 532 } 533 } 534 color = PETSC_DRAW_CYAN; 535 for (i=0,row=0; i<mbs; i++,row+=bs) { 536 for (j=a->i[i]; j<a->i[i+1]; j++) { 537 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 538 x_l = a->j[j]*bs; x_r = x_l + 1.0; 539 aa = a->a + j*bs2; 540 for (k=0; k<bs; k++) { 541 for (l=0; l<bs; l++) { 542 if (PetscRealPart(*aa++) != 0.) continue; 543 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 544 } 545 } 546 } 547 } 548 549 color = PETSC_DRAW_RED; 550 for (i=0,row=0; i<mbs; i++,row+=bs) { 551 for (j=a->i[i]; j<a->i[i+1]; j++) { 552 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 553 x_l = a->j[j]*bs; x_r = x_l + 1.0; 554 aa = a->a + j*bs2; 555 for (k=0; k<bs; k++) { 556 for (l=0; l<bs; l++) { 557 if (PetscRealPart(*aa++) <= 0.) continue; 558 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 569 { 570 int ierr; 571 PetscReal xl,yl,xr,yr,w,h; 572 PetscDraw draw; 573 PetscTruth isnull; 574 575 PetscFunctionBegin; 576 577 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 578 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 579 580 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 581 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 582 xr += w; yr += h; xl = -w; yl = -h; 583 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 584 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 585 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatView_SeqBAIJ" 591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 592 { 593 int ierr; 594 PetscTruth issocket,isascii,isbinary,isdraw; 595 596 PetscFunctionBegin; 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 if (issocket) { 602 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 603 } else if (isascii){ 604 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 605 } else if (isbinary) { 606 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 607 } else if (isdraw) { 608 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 609 } else { 610 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "MatGetValues_SeqBAIJ" 618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 619 { 620 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 621 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 622 int *ai = a->i,*ailen = a->ilen; 623 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 624 MatScalar *ap,*aa = a->a,zero = 0.0; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over rows */ 628 row = im[k]; brow = row/bs; 629 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 630 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 631 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 632 nrow = ailen[brow]; 633 for (l=0; l<n; l++) { /* loop over columns */ 634 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 635 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 636 col = in[l] ; 637 bcol = col/bs; 638 cidx = col%bs; 639 ridx = row%bs; 640 high = nrow; 641 low = 0; /* assume unsorted */ 642 while (high-low > 5) { 643 t = (low+high)/2; 644 if (rp[t] > bcol) high = t; 645 else low = t; 646 } 647 for (i=low; i<high; i++) { 648 if (rp[i] > bcol) break; 649 if (rp[i] == bcol) { 650 *v++ = ap[bs2*i+bs*cidx+ridx]; 651 goto finished; 652 } 653 } 654 *v++ = zero; 655 finished:; 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #if defined(PETSC_USE_MAT_SINGLE) 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 665 { 666 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 667 int ierr,i,N = m*n*b->bs2; 668 MatScalar *vsingle; 669 670 PetscFunctionBegin; 671 if (N > b->setvalueslen) { 672 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 673 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 674 b->setvalueslen = N; 675 } 676 vsingle = b->setvaluescopy; 677 for (i=0; i<N; i++) { 678 vsingle[i] = v[i]; 679 } 680 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 #endif 684 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 689 { 690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 691 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 692 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 693 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 694 PetscTruth roworiented=a->roworiented; 695 MatScalar *value = v,*ap,*aa = a->a,*bap; 696 697 PetscFunctionBegin; 698 if (roworiented) { 699 stepval = (n-1)*bs; 700 } else { 701 stepval = (m-1)*bs; 702 } 703 for (k=0; k<m; k++) { /* loop over added rows */ 704 row = im[k]; 705 if (row < 0) continue; 706 #if defined(PETSC_USE_BOPT_g) 707 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 708 #endif 709 rp = aj + ai[row]; 710 ap = aa + bs2*ai[row]; 711 rmax = imax[row]; 712 nrow = ailen[row]; 713 low = 0; 714 for (l=0; l<n; l++) { /* loop over added columns */ 715 if (in[l] < 0) continue; 716 #if defined(PETSC_USE_BOPT_g) 717 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 718 #endif 719 col = in[l]; 720 if (roworiented) { 721 value = v + k*(stepval+bs)*bs + l*bs; 722 } else { 723 value = v + l*(stepval+bs)*bs + k*bs; 724 } 725 if (!sorted) low = 0; high = nrow; 726 while (high-low > 7) { 727 t = (low+high)/2; 728 if (rp[t] > col) high = t; 729 else low = t; 730 } 731 for (i=low; i<high; i++) { 732 if (rp[i] > col) break; 733 if (rp[i] == col) { 734 bap = ap + bs2*i; 735 if (roworiented) { 736 if (is == ADD_VALUES) { 737 for (ii=0; ii<bs; ii++,value+=stepval) { 738 for (jj=ii; jj<bs2; jj+=bs) { 739 bap[jj] += *value++; 740 } 741 } 742 } else { 743 for (ii=0; ii<bs; ii++,value+=stepval) { 744 for (jj=ii; jj<bs2; jj+=bs) { 745 bap[jj] = *value++; 746 } 747 } 748 } 749 } else { 750 if (is == ADD_VALUES) { 751 for (ii=0; ii<bs; ii++,value+=stepval) { 752 for (jj=0; jj<bs; jj++) { 753 *bap++ += *value++; 754 } 755 } 756 } else { 757 for (ii=0; ii<bs; ii++,value+=stepval) { 758 for (jj=0; jj<bs; jj++) { 759 *bap++ = *value++; 760 } 761 } 762 } 763 } 764 goto noinsert2; 765 } 766 } 767 if (nonew == 1) goto noinsert2; 768 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 769 if (nrow >= rmax) { 770 /* there is no extra room in row, therefore enlarge */ 771 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 772 MatScalar *new_a; 773 774 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 775 776 /* malloc new storage space */ 777 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 778 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 779 new_j = (int*)(new_a + bs2*new_nz); 780 new_i = new_j + new_nz; 781 782 /* copy over old data into new slots */ 783 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 784 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 785 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 786 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 787 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 788 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 789 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 790 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 791 /* free up old matrix storage */ 792 ierr = PetscFree(a->a);CHKERRQ(ierr); 793 if (!a->singlemalloc) { 794 ierr = PetscFree(a->i);CHKERRQ(ierr); 795 ierr = PetscFree(a->j);CHKERRQ(ierr); 796 } 797 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 798 a->singlemalloc = PETSC_TRUE; 799 800 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 801 rmax = imax[row] = imax[row] + CHUNKSIZE; 802 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 803 a->maxnz += bs2*CHUNKSIZE; 804 a->reallocs++; 805 a->nz++; 806 } 807 N = nrow++ - 1; 808 /* shift up all the later entries in this row */ 809 for (ii=N; ii>=i; ii--) { 810 rp[ii+1] = rp[ii]; 811 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 812 } 813 if (N >= i) { 814 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 815 } 816 rp[i] = col; 817 bap = ap + bs2*i; 818 if (roworiented) { 819 for (ii=0; ii<bs; ii++,value+=stepval) { 820 for (jj=ii; jj<bs2; jj+=bs) { 821 bap[jj] = *value++; 822 } 823 } 824 } else { 825 for (ii=0; ii<bs; ii++,value+=stepval) { 826 for (jj=0; jj<bs; jj++) { 827 *bap++ = *value++; 828 } 829 } 830 } 831 noinsert2:; 832 low = i; 833 } 834 ailen[row] = nrow; 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 843 { 844 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 845 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 846 int m = A->m,*ip,N,*ailen = a->ilen; 847 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 848 MatScalar *aa = a->a,*ap; 849 850 PetscFunctionBegin; 851 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 852 853 if (m) rmax = ailen[0]; 854 for (i=1; i<mbs; i++) { 855 /* move each row back by the amount of empty slots (fshift) before it*/ 856 fshift += imax[i-1] - ailen[i-1]; 857 rmax = PetscMax(rmax,ailen[i]); 858 if (fshift) { 859 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 860 N = ailen[i]; 861 for (j=0; j<N; j++) { 862 ip[j-fshift] = ip[j]; 863 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 864 } 865 } 866 ai[i] = ai[i-1] + ailen[i-1]; 867 } 868 if (mbs) { 869 fshift += imax[mbs-1] - ailen[mbs-1]; 870 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 871 } 872 /* reset ilen and imax for each row */ 873 for (i=0; i<mbs; i++) { 874 ailen[i] = imax[i] = ai[i+1] - ai[i]; 875 } 876 a->nz = ai[mbs]; 877 878 /* diagonals may have moved, so kill the diagonal pointers */ 879 if (fshift && a->diag) { 880 ierr = PetscFree(a->diag);CHKERRQ(ierr); 881 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 882 a->diag = 0; 883 } 884 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 885 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 886 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 887 a->reallocs); 888 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 889 a->reallocs = 0; 890 A->info.nz_unneeded = (PetscReal)fshift*bs2; 891 892 PetscFunctionReturn(0); 893 } 894 895 896 897 /* 898 This function returns an array of flags which indicate the locations of contiguous 899 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 900 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 901 Assume: sizes should be long enough to hold all the values. 902 */ 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 906 { 907 int i,j,k,row; 908 PetscTruth flg; 909 910 PetscFunctionBegin; 911 for (i=0,j=0; i<n; j++) { 912 row = idx[i]; 913 if (row%bs!=0) { /* Not the begining of a block */ 914 sizes[j] = 1; 915 i++; 916 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 917 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 918 i++; 919 } else { /* Begining of the block, so check if the complete block exists */ 920 flg = PETSC_TRUE; 921 for (k=1; k<bs; k++) { 922 if (row+k != idx[i+k]) { /* break in the block */ 923 flg = PETSC_FALSE; 924 break; 925 } 926 } 927 if (flg == PETSC_TRUE) { /* No break in the bs */ 928 sizes[j] = bs; 929 i+= bs; 930 } else { 931 sizes[j] = 1; 932 i++; 933 } 934 } 935 } 936 *bs_max = j; 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 943 { 944 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 945 int ierr,i,j,k,count,is_n,*is_idx,*rows; 946 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 947 Scalar zero = 0.0; 948 MatScalar *aa; 949 950 PetscFunctionBegin; 951 /* Make a copy of the IS and sort it */ 952 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 953 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 954 955 /* allocate memory for rows,sizes */ 956 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 957 sizes = rows + is_n; 958 959 /* copy IS values to rows, and sort them */ 960 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 961 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 962 if (baij->keepzeroedrows) { 963 for (i=0; i<is_n; i++) { sizes[i] = 1; } 964 bs_max = is_n; 965 } else { 966 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 967 } 968 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 969 970 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 971 row = rows[j]; 972 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 973 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 974 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 975 if (sizes[i] == bs && !baij->keepzeroedrows) { 976 if (diag) { 977 if (baij->ilen[row/bs] > 0) { 978 baij->ilen[row/bs] = 1; 979 baij->j[baij->i[row/bs]] = row/bs; 980 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 981 } 982 /* Now insert all the diagonal values for this bs */ 983 for (k=0; k<bs; k++) { 984 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 985 } 986 } else { /* (!diag) */ 987 baij->ilen[row/bs] = 0; 988 } /* end (!diag) */ 989 } else { /* (sizes[i] != bs) */ 990 #if defined (PETSC_USE_DEBUG) 991 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 992 #endif 993 for (k=0; k<count; k++) { 994 aa[0] = zero; 995 aa += bs; 996 } 997 if (diag) { 998 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 999 } 1000 } 1001 } 1002 1003 ierr = PetscFree(rows);CHKERRQ(ierr); 1004 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 1011 { 1012 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1013 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1014 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1015 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1016 int ridx,cidx,bs2=a->bs2,ierr; 1017 PetscTruth roworiented=a->roworiented; 1018 MatScalar *ap,value,*aa=a->a,*bap; 1019 1020 PetscFunctionBegin; 1021 for (k=0; k<m; k++) { /* loop over added rows */ 1022 row = im[k]; brow = row/bs; 1023 if (row < 0) continue; 1024 #if defined(PETSC_USE_BOPT_g) 1025 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1026 #endif 1027 rp = aj + ai[brow]; 1028 ap = aa + bs2*ai[brow]; 1029 rmax = imax[brow]; 1030 nrow = ailen[brow]; 1031 low = 0; 1032 for (l=0; l<n; l++) { /* loop over added columns */ 1033 if (in[l] < 0) continue; 1034 #if defined(PETSC_USE_BOPT_g) 1035 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1036 #endif 1037 col = in[l]; bcol = col/bs; 1038 ridx = row % bs; cidx = col % bs; 1039 if (roworiented) { 1040 value = v[l + k*n]; 1041 } else { 1042 value = v[k + l*m]; 1043 } 1044 if (!sorted) low = 0; high = nrow; 1045 while (high-low > 7) { 1046 t = (low+high)/2; 1047 if (rp[t] > bcol) high = t; 1048 else low = t; 1049 } 1050 for (i=low; i<high; i++) { 1051 if (rp[i] > bcol) break; 1052 if (rp[i] == bcol) { 1053 bap = ap + bs2*i + bs*cidx + ridx; 1054 if (is == ADD_VALUES) *bap += value; 1055 else *bap = value; 1056 goto noinsert1; 1057 } 1058 } 1059 if (nonew == 1) goto noinsert1; 1060 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1061 if (nrow >= rmax) { 1062 /* there is no extra room in row, therefore enlarge */ 1063 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1064 MatScalar *new_a; 1065 1066 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1067 1068 /* Malloc new storage space */ 1069 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1070 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1071 new_j = (int*)(new_a + bs2*new_nz); 1072 new_i = new_j + new_nz; 1073 1074 /* copy over old data into new slots */ 1075 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1076 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1077 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1078 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1079 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1080 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1081 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1082 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1083 /* free up old matrix storage */ 1084 ierr = PetscFree(a->a);CHKERRQ(ierr); 1085 if (!a->singlemalloc) { 1086 ierr = PetscFree(a->i);CHKERRQ(ierr); 1087 ierr = PetscFree(a->j);CHKERRQ(ierr); 1088 } 1089 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1090 a->singlemalloc = PETSC_TRUE; 1091 1092 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1093 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1094 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1095 a->maxnz += bs2*CHUNKSIZE; 1096 a->reallocs++; 1097 a->nz++; 1098 } 1099 N = nrow++ - 1; 1100 /* shift up all the later entries in this row */ 1101 for (ii=N; ii>=i; ii--) { 1102 rp[ii+1] = rp[ii]; 1103 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1104 } 1105 if (N>=i) { 1106 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1107 } 1108 rp[i] = bcol; 1109 ap[bs2*i + bs*cidx + ridx] = value; 1110 noinsert1:; 1111 low = i; 1112 } 1113 ailen[brow] = nrow; 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*); 1119 EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*); 1120 EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1121 EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1122 EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1123 EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 1124 EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1125 EXTERN int MatScale_SeqBAIJ(Scalar*,Mat); 1126 EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 1127 EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 1128 EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1129 EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1130 EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1131 EXTERN int MatZeroEntries_SeqBAIJ(Mat); 1132 1133 EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1134 EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1135 EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1136 EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1137 EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1138 EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1139 EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1140 EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1141 EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 1142 EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 1143 EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 1144 EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 1145 EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 1146 EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 1147 EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 1148 1149 EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1150 EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1151 EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1152 EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1153 EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1154 EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1155 EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1156 1157 EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1158 EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1159 EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1160 EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1161 EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1162 EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1163 EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1164 EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1165 1166 EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1167 EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1168 EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1169 EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1170 EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1171 EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1172 EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1173 EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1177 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1178 { 1179 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1180 Mat outA; 1181 int ierr; 1182 PetscTruth row_identity,col_identity; 1183 1184 PetscFunctionBegin; 1185 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1186 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1187 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1188 if (!row_identity || !col_identity) { 1189 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1190 } 1191 1192 outA = inA; 1193 inA->factor = FACTOR_LU; 1194 1195 if (!a->diag) { 1196 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1197 } 1198 /* 1199 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1200 for ILU(0) factorization with natural ordering 1201 */ 1202 switch (a->bs) { 1203 case 1: 1204 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1205 inA->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering; 1206 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering; 1207 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1208 case 2: 1209 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1210 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1211 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1212 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1213 break; 1214 case 3: 1215 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1216 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1217 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering; 1218 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1219 break; 1220 case 4: 1221 #if defined(PETSC_HAVE_SSE) 1222 { 1223 PetscTruth sse_enabled,single_prec,flg; 1224 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 1225 if (sse_enabled) { 1226 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 1227 /* What should this prefix be mat_solve, pc_solve, check both? */ 1228 ierr = PetscOptionsGetLogical(PETSC_NULL,"-single_precision",&single_prec,&flg);CHKERRQ(ierr); 1229 if (flg) { 1230 if (single_prec) { 1231 a->singleprecisionsolves = PETSC_TRUE; 1232 } else { 1233 a->singleprecisionsolves = PETSC_FALSE; 1234 } 1235 } 1236 if (a->singleprecisionsolves) { 1237 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion; 1238 } else { 1239 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1240 } 1241 } else { 1242 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1243 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1244 } 1245 } 1246 #else 1247 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1248 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1249 #endif 1250 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering; 1251 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1252 break; 1253 case 5: 1254 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1255 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1256 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; 1257 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1258 break; 1259 case 6: 1260 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1261 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1262 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering; 1263 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1264 break; 1265 case 7: 1266 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1267 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 1268 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1269 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1270 break; 1271 default: 1272 a->row = row; 1273 a->col = col; 1274 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1275 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1276 1277 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1278 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1279 PetscLogObjectParent(inA,a->icol); 1280 1281 if (!a->solve_work) { 1282 ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr); 1283 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar)); 1284 } 1285 } 1286 1287 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1288 1289 PetscFunctionReturn(0); 1290 } 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1293 int MatPrintHelp_SeqBAIJ(Mat A) 1294 { 1295 static PetscTruth called = PETSC_FALSE; 1296 MPI_Comm comm = A->comm; 1297 int ierr; 1298 1299 PetscFunctionBegin; 1300 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1301 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1302 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 EXTERN_C_BEGIN 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1309 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1310 { 1311 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1312 int i,nz,n; 1313 1314 PetscFunctionBegin; 1315 nz = baij->maxnz; 1316 n = mat->n; 1317 for (i=0; i<nz; i++) { 1318 baij->j[i] = indices[i]; 1319 } 1320 baij->nz = nz; 1321 for (i=0; i<n; i++) { 1322 baij->ilen[i] = baij->imax[i]; 1323 } 1324 1325 PetscFunctionReturn(0); 1326 } 1327 EXTERN_C_END 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1331 /*@ 1332 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1333 in the matrix. 1334 1335 Input Parameters: 1336 + mat - the SeqBAIJ matrix 1337 - indices - the column indices 1338 1339 Level: advanced 1340 1341 Notes: 1342 This can be called if you have precomputed the nonzero structure of the 1343 matrix and want to provide it to the matrix object to improve the performance 1344 of the MatSetValues() operation. 1345 1346 You MUST have set the correct numbers of nonzeros per row in the call to 1347 MatCreateSeqBAIJ(). 1348 1349 MUST be called before any calls to MatSetValues(); 1350 1351 @*/ 1352 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1353 { 1354 int ierr,(*f)(Mat,int *); 1355 1356 PetscFunctionBegin; 1357 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1358 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1359 if (f) { 1360 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1361 } else { 1362 SETERRQ(1,"Wrong type of matrix to set column indices"); 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1369 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1370 { 1371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1372 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1373 PetscReal atmp; 1374 Scalar *x,zero = 0.0; 1375 MatScalar *aa; 1376 int ncols,brow,krow,kcol; 1377 1378 PetscFunctionBegin; 1379 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1380 bs = a->bs; 1381 aa = a->a; 1382 ai = a->i; 1383 aj = a->j; 1384 mbs = a->mbs; 1385 1386 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1387 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1388 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1389 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1390 for (i=0; i<mbs; i++) { 1391 ncols = ai[1] - ai[0]; ai++; 1392 brow = bs*i; 1393 for (j=0; j<ncols; j++){ 1394 /* bcol = bs*(*aj); */ 1395 for (kcol=0; kcol<bs; kcol++){ 1396 for (krow=0; krow<bs; krow++){ 1397 atmp = PetscAbsScalar(*aa); aa++; 1398 row = brow + krow; /* row index */ 1399 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1400 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1401 } 1402 } 1403 aj++; 1404 } 1405 } 1406 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1412 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1413 { 1414 int ierr; 1415 1416 PetscFunctionBegin; 1417 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1423 int MatGetArray_SeqBAIJ(Mat A,Scalar **array) 1424 { 1425 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1426 PetscFunctionBegin; 1427 *array = a->a; 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1433 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array) 1434 { 1435 PetscFunctionBegin; 1436 PetscFunctionReturn(0); 1437 } 1438 1439 /* -------------------------------------------------------------------*/ 1440 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1441 MatGetRow_SeqBAIJ, 1442 MatRestoreRow_SeqBAIJ, 1443 MatMult_SeqBAIJ_N, 1444 MatMultAdd_SeqBAIJ_N, 1445 MatMultTranspose_SeqBAIJ, 1446 MatMultTransposeAdd_SeqBAIJ, 1447 MatSolve_SeqBAIJ_N, 1448 0, 1449 0, 1450 0, 1451 MatLUFactor_SeqBAIJ, 1452 0, 1453 0, 1454 MatTranspose_SeqBAIJ, 1455 MatGetInfo_SeqBAIJ, 1456 MatEqual_SeqBAIJ, 1457 MatGetDiagonal_SeqBAIJ, 1458 MatDiagonalScale_SeqBAIJ, 1459 MatNorm_SeqBAIJ, 1460 0, 1461 MatAssemblyEnd_SeqBAIJ, 1462 0, 1463 MatSetOption_SeqBAIJ, 1464 MatZeroEntries_SeqBAIJ, 1465 MatZeroRows_SeqBAIJ, 1466 MatLUFactorSymbolic_SeqBAIJ, 1467 MatLUFactorNumeric_SeqBAIJ_N, 1468 0, 1469 0, 1470 MatSetUpPreallocation_SeqBAIJ, 1471 0, 1472 MatGetOwnershipRange_SeqBAIJ, 1473 MatILUFactorSymbolic_SeqBAIJ, 1474 0, 1475 MatGetArray_SeqBAIJ, 1476 MatRestoreArray_SeqBAIJ, 1477 MatDuplicate_SeqBAIJ, 1478 0, 1479 0, 1480 MatILUFactor_SeqBAIJ, 1481 0, 1482 0, 1483 MatGetSubMatrices_SeqBAIJ, 1484 MatIncreaseOverlap_SeqBAIJ, 1485 MatGetValues_SeqBAIJ, 1486 0, 1487 MatPrintHelp_SeqBAIJ, 1488 MatScale_SeqBAIJ, 1489 0, 1490 0, 1491 0, 1492 MatGetBlockSize_SeqBAIJ, 1493 MatGetRowIJ_SeqBAIJ, 1494 MatRestoreRowIJ_SeqBAIJ, 1495 0, 1496 0, 1497 0, 1498 0, 1499 0, 1500 0, 1501 MatSetValuesBlocked_SeqBAIJ, 1502 MatGetSubMatrix_SeqBAIJ, 1503 MatDestroy_SeqBAIJ, 1504 MatView_SeqBAIJ, 1505 MatGetMaps_Petsc, 1506 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 0, 1512 MatGetRowMax_SeqBAIJ, 1513 MatConvert_Basic}; 1514 1515 EXTERN_C_BEGIN 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1518 int MatStoreValues_SeqBAIJ(Mat mat) 1519 { 1520 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1521 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1522 int ierr; 1523 1524 PetscFunctionBegin; 1525 if (aij->nonew != 1) { 1526 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1527 } 1528 1529 /* allocate space for values if not already there */ 1530 if (!aij->saved_values) { 1531 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr); 1532 } 1533 1534 /* copy values over */ 1535 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 EXTERN_C_END 1539 1540 EXTERN_C_BEGIN 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1543 int MatRetrieveValues_SeqBAIJ(Mat mat) 1544 { 1545 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1546 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1547 1548 PetscFunctionBegin; 1549 if (aij->nonew != 1) { 1550 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1551 } 1552 if (!aij->saved_values) { 1553 SETERRQ(1,"Must call MatStoreValues(A);first"); 1554 } 1555 1556 /* copy values over */ 1557 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 EXTERN_C_END 1561 1562 EXTERN_C_BEGIN 1563 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1564 EXTERN_C_END 1565 1566 EXTERN_C_BEGIN 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatCreate_SeqBAIJ" 1569 int MatCreate_SeqBAIJ(Mat B) 1570 { 1571 int ierr,size; 1572 Mat_SeqBAIJ *b; 1573 1574 PetscFunctionBegin; 1575 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1576 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1577 1578 B->m = B->M = PetscMax(B->m,B->M); 1579 B->n = B->N = PetscMax(B->n,B->N); 1580 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1581 B->data = (void*)b; 1582 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1583 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1584 B->factor = 0; 1585 B->lupivotthreshold = 1.0; 1586 B->mapping = 0; 1587 b->row = 0; 1588 b->col = 0; 1589 b->icol = 0; 1590 b->reallocs = 0; 1591 b->saved_values = 0; 1592 #if defined(PETSC_USE_MAT_SINGLE) 1593 b->setvalueslen = 0; 1594 b->setvaluescopy = PETSC_NULL; 1595 #endif 1596 1597 ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1598 ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1599 1600 b->sorted = PETSC_FALSE; 1601 b->roworiented = PETSC_TRUE; 1602 b->nonew = 0; 1603 b->diag = 0; 1604 b->solve_work = 0; 1605 b->mult_work = 0; 1606 b->spptr = 0; 1607 B->info.nz_unneeded = (PetscReal)b->maxnz; 1608 b->keepzeroedrows = PETSC_FALSE; 1609 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1611 "MatStoreValues_SeqBAIJ", 1612 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1614 "MatRetrieveValues_SeqBAIJ", 1615 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1617 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1618 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1620 "MatConvert_SeqBAIJ_SeqAIJ", 1621 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1622 PetscFunctionReturn(0); 1623 } 1624 EXTERN_C_END 1625 1626 #undef __FUNCT__ 1627 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1628 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1629 { 1630 Mat C; 1631 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1632 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1633 1634 PetscFunctionBegin; 1635 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1636 1637 *B = 0; 1638 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1639 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1640 c = (Mat_SeqBAIJ*)C->data; 1641 1642 c->bs = a->bs; 1643 c->bs2 = a->bs2; 1644 c->mbs = a->mbs; 1645 c->nbs = a->nbs; 1646 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1647 1648 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1649 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1650 for (i=0; i<mbs; i++) { 1651 c->imax[i] = a->imax[i]; 1652 c->ilen[i] = a->ilen[i]; 1653 } 1654 1655 /* allocate the matrix space */ 1656 c->singlemalloc = PETSC_TRUE; 1657 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1658 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1659 c->j = (int*)(c->a + nz*bs2); 1660 c->i = c->j + nz; 1661 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1662 if (mbs > 0) { 1663 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1664 if (cpvalues == MAT_COPY_VALUES) { 1665 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1666 } else { 1667 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1668 } 1669 } 1670 1671 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1672 c->sorted = a->sorted; 1673 c->roworiented = a->roworiented; 1674 c->nonew = a->nonew; 1675 1676 if (a->diag) { 1677 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1678 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1679 for (i=0; i<mbs; i++) { 1680 c->diag[i] = a->diag[i]; 1681 } 1682 } else c->diag = 0; 1683 c->nz = a->nz; 1684 c->maxnz = a->maxnz; 1685 c->solve_work = 0; 1686 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1687 c->mult_work = 0; 1688 C->preallocated = PETSC_TRUE; 1689 C->assembled = PETSC_TRUE; 1690 *B = C; 1691 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 EXTERN_C_BEGIN 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatLoad_SeqBAIJ" 1698 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1699 { 1700 Mat_SeqBAIJ *a; 1701 Mat B; 1702 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1703 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1704 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1705 int *masked,nmask,tmp,bs2,ishift; 1706 Scalar *aa; 1707 MPI_Comm comm = ((PetscObject)viewer)->comm; 1708 1709 PetscFunctionBegin; 1710 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1711 bs2 = bs*bs; 1712 1713 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1714 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1715 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1716 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1717 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1718 M = header[1]; N = header[2]; nz = header[3]; 1719 1720 if (header[3] < 0) { 1721 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1722 } 1723 1724 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1725 1726 /* 1727 This code adds extra rows to make sure the number of rows is 1728 divisible by the blocksize 1729 */ 1730 mbs = M/bs; 1731 extra_rows = bs - M + bs*(mbs); 1732 if (extra_rows == bs) extra_rows = 0; 1733 else mbs++; 1734 if (extra_rows) { 1735 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1736 } 1737 1738 /* read in row lengths */ 1739 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1740 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1741 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1742 1743 /* read in column indices */ 1744 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1745 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1746 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1747 1748 /* loop over row lengths determining block row lengths */ 1749 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1750 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1751 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1752 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1753 masked = mask + mbs; 1754 rowcount = 0; nzcount = 0; 1755 for (i=0; i<mbs; i++) { 1756 nmask = 0; 1757 for (j=0; j<bs; j++) { 1758 kmax = rowlengths[rowcount]; 1759 for (k=0; k<kmax; k++) { 1760 tmp = jj[nzcount++]/bs; 1761 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1762 } 1763 rowcount++; 1764 } 1765 browlengths[i] += nmask; 1766 /* zero out the mask elements we set */ 1767 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1768 } 1769 1770 /* create our matrix */ 1771 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1772 B = *A; 1773 a = (Mat_SeqBAIJ*)B->data; 1774 1775 /* set matrix "i" values */ 1776 a->i[0] = 0; 1777 for (i=1; i<= mbs; i++) { 1778 a->i[i] = a->i[i-1] + browlengths[i-1]; 1779 a->ilen[i-1] = browlengths[i-1]; 1780 } 1781 a->nz = 0; 1782 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1783 1784 /* read in nonzero values */ 1785 ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr); 1786 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1787 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1788 1789 /* set "a" and "j" values into matrix */ 1790 nzcount = 0; jcount = 0; 1791 for (i=0; i<mbs; i++) { 1792 nzcountb = nzcount; 1793 nmask = 0; 1794 for (j=0; j<bs; j++) { 1795 kmax = rowlengths[i*bs+j]; 1796 for (k=0; k<kmax; k++) { 1797 tmp = jj[nzcount++]/bs; 1798 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1799 } 1800 } 1801 /* sort the masked values */ 1802 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1803 1804 /* set "j" values into matrix */ 1805 maskcount = 1; 1806 for (j=0; j<nmask; j++) { 1807 a->j[jcount++] = masked[j]; 1808 mask[masked[j]] = maskcount++; 1809 } 1810 /* set "a" values into matrix */ 1811 ishift = bs2*a->i[i]; 1812 for (j=0; j<bs; j++) { 1813 kmax = rowlengths[i*bs+j]; 1814 for (k=0; k<kmax; k++) { 1815 tmp = jj[nzcountb]/bs ; 1816 block = mask[tmp] - 1; 1817 point = jj[nzcountb] - bs*tmp; 1818 idx = ishift + bs2*block + j + bs*point; 1819 a->a[idx] = (MatScalar)aa[nzcountb++]; 1820 } 1821 } 1822 /* zero out the mask elements we set */ 1823 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1824 } 1825 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1826 1827 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1828 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1829 ierr = PetscFree(aa);CHKERRQ(ierr); 1830 ierr = PetscFree(jj);CHKERRQ(ierr); 1831 ierr = PetscFree(mask);CHKERRQ(ierr); 1832 1833 B->assembled = PETSC_TRUE; 1834 1835 ierr = MatView_Private(B);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 EXTERN_C_END 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatCreateSeqBAIJ" 1842 /*@C 1843 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1844 compressed row) format. For good matrix assembly performance the 1845 user should preallocate the matrix storage by setting the parameter nz 1846 (or the array nnz). By setting these parameters accurately, performance 1847 during matrix assembly can be increased by more than a factor of 50. 1848 1849 Collective on MPI_Comm 1850 1851 Input Parameters: 1852 + comm - MPI communicator, set to PETSC_COMM_SELF 1853 . bs - size of block 1854 . m - number of rows 1855 . n - number of columns 1856 . nz - number of nonzero blocks per block row (same for all rows) 1857 - nnz - array containing the number of nonzero blocks in the various block rows 1858 (possibly different for each block row) or PETSC_NULL 1859 1860 Output Parameter: 1861 . A - the matrix 1862 1863 Options Database Keys: 1864 . -mat_no_unroll - uses code that does not unroll the loops in the 1865 block calculations (much slower) 1866 . -mat_block_size - size of the blocks to use 1867 1868 Level: intermediate 1869 1870 Notes: 1871 A nonzero block is any block that as 1 or more nonzeros in it 1872 1873 The block AIJ format is fully compatible with standard Fortran 77 1874 storage. That is, the stored row and column indices can begin at 1875 either one (as in Fortran) or zero. See the users' manual for details. 1876 1877 Specify the preallocated storage with either nz or nnz (not both). 1878 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1879 allocation. For additional details, see the users manual chapter on 1880 matrices. 1881 1882 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1883 @*/ 1884 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1885 { 1886 int ierr; 1887 1888 PetscFunctionBegin; 1889 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1890 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1891 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1897 /*@C 1898 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1899 per row in the matrix. For good matrix assembly performance the 1900 user should preallocate the matrix storage by setting the parameter nz 1901 (or the array nnz). By setting these parameters accurately, performance 1902 during matrix assembly can be increased by more than a factor of 50. 1903 1904 Collective on MPI_Comm 1905 1906 Input Parameters: 1907 + A - the matrix 1908 . bs - size of block 1909 . nz - number of block nonzeros per block row (same for all rows) 1910 - nnz - array containing the number of block nonzeros in the various block rows 1911 (possibly different for each block row) or PETSC_NULL 1912 1913 Options Database Keys: 1914 . -mat_no_unroll - uses code that does not unroll the loops in the 1915 block calculations (much slower) 1916 . -mat_block_size - size of the blocks to use 1917 1918 Level: intermediate 1919 1920 Notes: 1921 The block AIJ format is fully compatible with standard Fortran 77 1922 storage. That is, the stored row and column indices can begin at 1923 either one (as in Fortran) or zero. See the users' manual for details. 1924 1925 Specify the preallocated storage with either nz or nnz (not both). 1926 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1927 allocation. For additional details, see the users manual chapter on 1928 matrices. 1929 1930 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1931 @*/ 1932 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1933 { 1934 Mat_SeqBAIJ *b; 1935 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1936 PetscTruth flg; 1937 1938 PetscFunctionBegin; 1939 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1940 if (!flg) PetscFunctionReturn(0); 1941 1942 B->preallocated = PETSC_TRUE; 1943 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1944 if (nnz && newbs != bs) { 1945 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1946 } 1947 bs = newbs; 1948 1949 mbs = B->m/bs; 1950 nbs = B->n/bs; 1951 bs2 = bs*bs; 1952 1953 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1954 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1955 } 1956 1957 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1958 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1959 if (nnz) { 1960 for (i=0; i<mbs; i++) { 1961 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1962 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1963 } 1964 } 1965 1966 b = (Mat_SeqBAIJ*)B->data; 1967 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1968 if (!flg) { 1969 switch (bs) { 1970 case 1: 1971 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1972 B->ops->solve = MatSolve_SeqBAIJ_1; 1973 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 1974 B->ops->mult = MatMult_SeqBAIJ_1; 1975 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1976 break; 1977 case 2: 1978 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1979 B->ops->solve = MatSolve_SeqBAIJ_2; 1980 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 1981 B->ops->mult = MatMult_SeqBAIJ_2; 1982 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1983 break; 1984 case 3: 1985 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1986 B->ops->solve = MatSolve_SeqBAIJ_3; 1987 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3; 1988 B->ops->mult = MatMult_SeqBAIJ_3; 1989 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1990 break; 1991 case 4: 1992 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1993 B->ops->solve = MatSolve_SeqBAIJ_4; 1994 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4; 1995 B->ops->mult = MatMult_SeqBAIJ_4; 1996 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1997 break; 1998 case 5: 1999 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2000 B->ops->solve = MatSolve_SeqBAIJ_5; 2001 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5; 2002 B->ops->mult = MatMult_SeqBAIJ_5; 2003 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2004 break; 2005 case 6: 2006 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2007 B->ops->solve = MatSolve_SeqBAIJ_6; 2008 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6; 2009 B->ops->mult = MatMult_SeqBAIJ_6; 2010 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2011 break; 2012 case 7: 2013 B->ops->mult = MatMult_SeqBAIJ_7; 2014 B->ops->solve = MatSolve_SeqBAIJ_7; 2015 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 2016 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2017 break; 2018 default: 2019 B->ops->mult = MatMult_SeqBAIJ_N; 2020 B->ops->solve = MatSolve_SeqBAIJ_N; 2021 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2022 break; 2023 } 2024 } 2025 b->bs = bs; 2026 b->mbs = mbs; 2027 b->nbs = nbs; 2028 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2029 if (!nnz) { 2030 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2031 else if (nz <= 0) nz = 1; 2032 for (i=0; i<mbs; i++) b->imax[i] = nz; 2033 nz = nz*mbs; 2034 } else { 2035 nz = 0; 2036 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2037 } 2038 2039 /* allocate the matrix space */ 2040 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2041 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2042 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2043 b->j = (int*)(b->a + nz*bs2); 2044 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2045 b->i = b->j + nz; 2046 b->singlemalloc = PETSC_TRUE; 2047 2048 b->i[0] = 0; 2049 for (i=1; i<mbs+1; i++) { 2050 b->i[i] = b->i[i-1] + b->imax[i-1]; 2051 } 2052 2053 /* b->ilen will count nonzeros in each block row so far. */ 2054 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2055 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2056 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2057 2058 b->bs = bs; 2059 b->bs2 = bs2; 2060 b->mbs = mbs; 2061 b->nz = 0; 2062 b->maxnz = nz*bs2; 2063 B->info.nz_unneeded = (PetscReal)b->maxnz; 2064 PetscFunctionReturn(0); 2065 } 2066 2067