# Example usage with test FASTQ files
from BarcodeSeqKit.core import BarcodeConfig, BarcodeLocationType, BarcodeExtractorConfig
# Define barcodes to search for
barcodes = [
BarcodeConfig(
sequence= "TAACTGAGGCCGGC" , # 3' barcode
location= BarcodeLocationType.THREE_PRIME,
name= "3prime" ,
description= "3' barcode from test data"
),
BarcodeConfig(
sequence= "CTGACTCCTTAAGGGCC" , # 5' barcode
location= BarcodeLocationType.FIVE_PRIME,
name= "5prime" ,
description= "5' barcode from test data"
)
]
# Create a configuration
output_dir = "../tests/fastq_output"
os.makedirs(output_dir, exist_ok= True )
config = BarcodeExtractorConfig(
barcodes= barcodes,
output_prefix= "test_extraction" ,
output_dir= output_dir,
max_mismatches= 0 ,
verbose= True
)
# Path to test FASTQ files
fastq_dir = "../tests"
test_fastq1 = os.path.join(fastq_dir, "test.1.fastq.gz" )
test_fastq2 = os.path.join(fastq_dir, "test.2.fastq.gz" )
# Check if test files exist
if os.path.exists(test_fastq1) and os.path.exists(test_fastq2):
print (f"Processing FASTQ files: { test_fastq1} , { test_fastq2} " )
# Process the FASTQ files
stats = process_fastq_files(
config= config,
fastq_files= [test_fastq1, test_fastq2],
compress_output= True ,
search_both_reads= True
)
# Print results
print (f" \n Total reads: { stats. total_reads} " )
print (f"Total barcode matches: { stats. total_barcode_matches} " )
for barcode_name, count in stats.matches_by_barcode.items():
print (f" { barcode_name} : { count} matches" )
for orientation, count in stats.matches_by_orientation.items():
print (f" Orientation { orientation} : { count} matches" )
for category, count in stats.matches_by_category.items():
print (f" Category { category} : { count} matches" )
# List the output files
output_files = [f for f in os.listdir(output_dir) if f.startswith("test_extraction_" ) and f.endswith((".fastq.gz" , ".fastq" ))]
print (" \n Output files:" )
for f in output_files:
path = os.path.join(output_dir, f)
if f.endswith(".fastq.gz" ):
try :
# Count FASTQ reads
read_count = FastqHandler.count_fastq_reads(path)
print (f" { f} ( { read_count} reads)" )
except Exception as e:
# Fallback to file size
size = os.path.getsize(path)
print (f" { f} ( { size} bytes) - Error getting read count: { str (e)} " )
else :
size = os.path.getsize(path)
print (f" { f} ( { size} bytes)" )
else :
print (f"Test files not found. Expected: { test_fastq1} and { test_fastq2} " )