biolockj#

Property Description
biolockj.version string
Property giving the biolockj version that was used to generate the config file.
default: null

cluster#

Property Description
cluster.batchCommand string
Terminal command used to submit jobs on the cluster
default: null
cluster.host string
The remote cluster host URL (used for ssh, scp, rsync, etc)
default: null
cluster.jobHeader string
Header written at top of worker scripts
default: null
cluster.modules list
List of cluster modules to load at start of worker scripts
default: null
cluster.prologue string
To run at the start of every script after loading cluster modules (if any)
default: null
cluster.returnsBatchIds boolean
Options Y/N. Does the cluster.batchCommand return a job id. If Y, if a job is submitted and no id is returned, that will be treated as a failure and the pipeline will stop.
default: Y
cluster.statusCommand string
Terminal command used to check the status of jobs on the cluster
default: null

demultiplexer#

Property Description
demultiplexer.barcodeCutoff numeric
Options: (0.0 - 1.0); if defined, pipeline will fail if the percentage of reads with a barcode is less than this cutoff.
default: 0.05
demultiplexer.barcodeRevComp boolean
Options: Y/N. Use reverse compliment of metadata.barcodeColumn if demultimplexer.strategy = barcode_in_header or barcode_in_seq.
default: null
demultiplexer.strategy string
Options: barcode_in_header, barcode_in_seq, id_in_header, do_not_demux.If using barcodes, they must be provided in the metadata file within column defined by metadata.barcodeColumn.
default: null

docker#

Property Description
docker.imageName string
The name of a docker image to override whatever a module says to use. Only use the module-specific-override form of this property.
default: null
docker.imageOwner string
Name of the Docker Hub user that owns the docker containers. Only use the module-specific-override form of this property.
default: null
docker.imageTag string
Image tag, a specific version of Docker images. Only use the module-specific-override form of this property.
default: null
docker.mountSock boolean
should /var/run/docker.sock be mounted for modules.
default: N
docker.saveContainerOnExit boolean
If Y, docker run command will NOT include the --rm flag
default: null
docker.verifyImage boolean
In check dependencies, run a test to verify the docker image.
default: null

exe#

Property Description
exe.Rscript executable
Path for the "Rscript" executable; if not supplied, any script that needs the Rscript command will assume it is on the PATH.
default: null
exe.awk executable
Path for the "awk" executable; if not supplied, any script that needs the awk command will assume it is on the PATH.
default: null
exe.docker executable
Path for the "docker" executable; if not supplied, any script that needs the docker command will assume it is on the PATH.
default: null
exe.gzip executable
Path for the "gzip" executable; if not supplied, any script that needs the gzip command will assume it is on the PATH.
default: null
exe.java executable
Path for the "java" executable; if not supplied, any script that needs the java command will assume it is on the PATH.
default: null
exe.python executable
Path for the "python" executable; if not supplied, any script that needs the python command will assume it is on the PATH.
default: null

humann2#

Property Description
humann2.disableGeneFamilies boolean
disable HumanN2 Gene Family report
default: null
humann2.disablePathAbundance boolean
disable HumanN2 Pathway Abundance report
default: null
humann2.disablePathCoverage boolean
disable HumanN2 Pathway Coverage report
default: null

input#

Property Description
input.allowDuplicateNames boolean
Should files with the same name be permitted in inputs. File names are used to link data to metadata, and duplicated names create ambiguity. However in some pipelines, duplicates are appropriate.
default: N
input.dirPaths list of file paths
List of one or more directories containing the pipeline input data.
default: null
input.ignoreFiles list
file names to ignore if found in input directories
default: null
input.requireCompletePairs boolean
Require all sequence input files have matching paired reads
default: Y
input.suffixFw regex
file suffix used to identify forward reads ininput.dirPaths
default: _R1
input.suffixRv regex
file suffix used to identify reverse reads ininput.dirPaths
default: _R2
input.trimPrefix string
Prefix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text before it will be removed.
default: null
input.trimSuffix string
Suffix to trim from sequence file names or headers to obtain Sample ID; this string can appear anywhere in the filename and all text after it will be removed.
default: null

metadata#

Property Description
metadata.barcodeColumn string
metadata column with identifying barcodes
default: BarcodeSequence
metadata.columnDelim string
defines how metadata columns are separated; Typically files are tab or comma separated.
default: \t
metadata.commentChar string
metadata file comment indicator; Empty string is a valid option indicating no comments in metadata file.
default: null
metadata.fileNameColumn list
name of the metadata column(s) with input file names
default: null
metadata.filePath file path
If absolute file path, use file as metadata.
If directory path, must find exactly 1 file within, to use as metadata.
default: null
metadata.nullValue string
metadata cells with this value will be treated as empty
default: NA
metadata.required boolean
If Y, require metadata row for each sample with sequence data in input dirs; If N, samples without metadata are ignored.
default: N
metadata.useEveryRow boolean
If Y, require a sequence file for every SampleID (every row) in metadata file; If N, metadata can include extraneous SampleIDs.
default: null

pipeline#

Property Description
pipeline.copyInput boolean
copy input files into pipeline root directory
default: null
pipeline.defaultDemultiplexer string
Java class name for default module used to demultiplex data
default: biolockj.module.implicit.Demultiplexer
pipeline.defaultFastaConverter string
Java class name for default module used to convert files into fasta format
default: biolockj.module.seq.AwkFastaConverter
pipeline.defaultProps list of file paths
file path of default property file(s); Nested default properties are supported (so the default property file can also have a default, and so on).
default: null
pipeline.defaultSeqMerger string
Java class name for default module used combined paired read files
default: biolockj.module.seq.PearMergeReads
pipeline.defaultStatsModule string
Java class name for default module used generate p-value and other stats
default: biolockj.module.report.r.R_CalculateStats
pipeline.deleteTempFiles boolean
delete files in temp directories
default: null
pipeline.detachJavaModules boolean
If true Java modules do not run with main BioLockJ Java application. Instead they run on compute nodes on the CLUSTER or AWS environments.
default: Y
pipeline.disableAddImplicitModules boolean
If set to true, implicit modules will not be added to the pipeline.
default: null
pipeline.disableAddPreReqModules boolean
If set to true, prerequisite modules will not be added to the pipeline.
default: null
pipeline.downloadDir file path
local directory used as the destination in the download command
default: $HOME/projects/downloads
pipeline.env string
Environment in which a pipeline is run. Options: cluster, aws, local
default: local
pipeline.envVars list
list of variables that should be passed into the runtime environment for all modules.
default: BLJ
pipeline.inputTypes list
List of file types. This manually overrides the recommended auto-detection.
default: null
pipeline.limitDebugClasses list
limit classes that log debug statements
default: null
pipeline.logLevel string
Options: DEBUG, INFO, WARN, ERROR
default: INFO
pipeline.permissions string
Set chmod -R command security bits on pipeline root directory (Ex. 770)
default: 770
pipeline.setSeed integer
set the seed for a random process. Must be positive integer.
default: null
pipeline.useEnvVars boolean
when evaluating variables in the ${VAR} format, should environment variables be used. Regardless, priority is given to variable values defined in the config file.
default: Y
pipeline.userProfile file path
Bash profile - may be ~/.bash_profile or ~/.bashrc or others
default: null

qiime#

Property Description
qiime.alphaMetrics list
alpha diversity metrics to calculate through qiime; For complete list of skbio.diversity.alpha options, see http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html
default: shannon
qiime.plotAlphaMetrics boolean

default: Y

r#

Property Description
r.colorBase string
base color used for labels & headings in the PDF report; Must be a valid color in R.
default: black
r.colorFile file path
path to a tab-delimited file giving the color to use for each value of each metadata field plotted.
default: null
r.colorHighlight string
color is used to highlight significant OTUs in plot
default: red
r.colorPalette string
palette argument passed to get_palette {ggpubr} to select colors for some output visualiztions
default: null
r.colorPoint string
default color of scatterplot and strip-chart plot points
default: black
r.debug boolean
Options: Y/N. If Y, will generate R Script log files
default: Y
r.excludeFields list
Fields from the metadata that will be excluded from any auto-determined typing, or plotting; R reports must contain at least one valid nominal or numeric metadata field.
default: null
r.nominalFields list
Override default property type by explicitly listing it as nominal.
default: null
r.numericFields list
Override default property type by explicitly listing it as numeric.
default: null
r.pch integer
Sets R plot pch parameter for PDF report
default: 21
r.pvalCutoff numeric
p-value cutoff used to assign label r.colorHighlight
default: 0.05
r.rareOtuThreshold numeric
If >=1, R will filter OTUs found in fewer than this many samples. If <1, R will interperate the value as a percentage and discard OTUs not found in at least that percentage of samples
default: 1
r.reportFields list
Metadata fields to include in reports; Fields listed here must exist in the metadata file. R reports must contain at least one valid field.
default: null
r.saveRData boolean
If Y, all R script generating BioModules will save R Session data to the module output directory to a file using the extension ".RData"
default: null
r.timeout integer
defines the number of minutes before R script fails due to timeout. If set to 0, an estimate is used.
default: 0
r.useUniqueColors boolean
force to use a unique color for every value in every field plotted; only recommended for low numbers of metadata columns/values.
default: null

r_PlotMds#

Property Description
r_PlotMds.reportFields list
Metadata column names indicating fields to include in the MDS report; Fields listed here must exist in the metadata file.
default: null

report#

Property Description
report.logBase string
Options: 10,e,null. If e, use natural log (base e); if 10, use log base 10; if not set, counts will not be converted to a log scale.
default: 10
report.minCount integer
minimum table count allowed, if a count less that this value is found, it is set to 0.
default: 2
report.numHits boolean
Options: Y/N. If Y, and add Num_Hits to metadata
default: Y
report.numReads boolean
Options: Y/N. If Y, and add Num_Reads to metadata
default: Y
report.scarceCountCutoff numeric
Minimum percentage of samples that must contain a count value for it to be kept.
default: 0.25
report.scarceSampleCutoff numeric
Minimum percentage of data columns that must be non-zero to keep the sample.
default: 0.25
report.taxonomyLevels list
Options: domain,phylum,class,order,family,genus,species. Generate reports for listed taxonomy levels
default: phylum,class,order,family,genus
report.unclassifiedTaxa boolean
report unclassified taxa
default: Y

script#

Property Description
script.defaultHeader string
Store default script header for MAIN script and locally run WORKER scripts.
default: #!/bin/bash
script.fileRefreshDelay integer
delay this many seconds after scripts complete to allow the file system to reflect changes from a worker node/container/virtual machine.
default: 1
script.numThreads integer
Used to reserve cluster resources and passed to any external application call that accepts a numThreads parameter.
default: 8
script.numWorkers integer
Set number of samples to process per script (if parallel processing)
default: 1
script.permissions string
Used as chmod permission parameter (ex: 774)
default: 770
script.timeout integer
Sets # of minutes before worker scripts times out.
default: null

validation#

Property Description
validation.compareOn list
Which columns in the expectation file should be used for the comparison. Options: name, size, md5. Default: use all columns in the expectation file.
default: null
validation.disableValidation boolean
Turn off validation. No validation file output is produced. Options: Y/N. default: N
default: null
validation.expectationFile file path
file path that gives the expected values for file metrics (probably generated by a previous run of the same pipeline)
default: null
validation.reportOn list
Which attributes of the file should be included in the validation report file. Options: name, size, md5
default: null
validation.sizeWithinPercent numeric
What percentage difference is permitted between an output file and its expectation. Options: any positive number
default: null
validation.stopPipeline boolean
If enabled, the validation utlility will stop the pipeline if any module fails validation. Options: Y/N
default: N

aws#

Property Description
aws.copyDbToS3 boolean
If true, save all input files to S3
default: null
aws.copyPipelineToS3 boolean
If enabled save pipeline to S3
default: null
aws.copyReportsToS3 boolean
If enabled save reports to S3
default: null
aws.ec2AcquisitionStrategy string
The AWS acquisition strategy (SPOT or DEMAND) sets the service SLA for procuring new EC2 instances
default: null
aws.ec2InstanceID string
ID of an existing ec2 instance to use as the head node
default: null
aws.ec2InstanceType string
AWS instance type determines initial resource class (t2.micro is common)
default: null
aws.ec2SpotPer __

default: null
aws.ec2TerminateHead boolean

default: null
aws.profile file path

default: null
aws.purgeEfsInputs boolean
If enabled delete all EFS dirs (except pipelines)
default: null
aws.purgeEfsOutput boolean
If enabled delete all EFS/pipelines
default: null
aws.ram string
AWS memory set in Nextflow main.nf
default: null
aws.region string

default: null
aws.s3 string
AWS S3 pipeline output directory used by Nextflow main.nf
default: null
aws.s3TransferTimeout integer
Set the max number of minutes to allow for S3 transfers to complete.
default: null
aws.saveCloud boolean

default: null
aws.stack string
An existing aws cloud stack ID
default: null
aws.walltime __

default: null