This guide is your quick-start manual for using QuadB64. It shows you how to encode and decode different types of data, from simple text to complex AI embeddings, and how to make sure it runs super fast. Think of it as learning the secret handshake to a club that keeps your data safe and speedy.
Basic Usage Guide
Getting Started with uubed
This guide covers the essential patterns and common operations you’ll use with the uubed library. After reading this, you’ll understand how to effectively use QuadB64 encoding in your applications.
Core Concepts
Import Patterns
# Basic imports
from uubed import encode_eq64, decode_eq64
from uubed import encode_shq64, encode_t8q64, encode_zoq64
# Advanced imports
from uubed import encode, decode, Config
from uubed import has_native_extensions, benchmark
The Unified API
The encode()
and decode()
functions provide a unified interface:
from uubed import encode, decode
# Specify encoding method
data = b"Hello, world!"
encoded = encode(data, method="eq64")
decoded = decode(encoded)
# Or use variant-specific functions
encoded = encode_eq64(data) # Same result
decoded = decode_eq64(encoded) # Same result
Working with Different Data Types
Text Data
from uubed import encode_eq64, decode_eq64
# String to bytes conversion
text = "QuadB64 prevents substring pollution!"
data = text.encode('utf-8')
# Encode and decode
encoded = encode_eq64(data)
decoded = decode_eq64(encoded)
recovered_text = decoded.decode('utf-8')
assert text == recovered_text
print(f"Original: {text}")
print(f"Encoded: {encoded}")
print(f"Recovered: {recovered_text}")
Binary Files
# Read binary file
with open("image.jpg", "rb") as f:
image_data = f.read()
# Encode for storage in text-based systems
encoded = encode_eq64(image_data)
# Store in database, JSON, etc.
document = {
"id": "img_001",
"filename": "image.jpg",
"size": len(image_data),
"data": encoded # Safe for text storage
}
# Later: retrieve and decode
decoded_data = decode_eq64(document["data"])
assert decoded_data == image_data
NumPy Arrays
import numpy as np
from uubed import encode_eq64, decode_eq64
# Create array
arr = np.random.rand(100, 50).astype(np.float32)
# Encode array
encoded = encode_eq64(arr.tobytes())
# Decode and reconstruct
decoded_bytes = decode_eq64(encoded)
reconstructed = np.frombuffer(decoded_bytes, dtype=np.float32)
reconstructed = reconstructed.reshape(100, 50)
assert np.array_equal(arr, reconstructed)
ML Embeddings
# Typical ML workflow
from sentence_transformers import SentenceTransformer
from uubed import encode_shq64, encode_eq64
model = SentenceTransformer('all-MiniLM-L6-v2')
# Generate embeddings
texts = [
"Machine learning is fascinating",
"Deep learning uses neural networks",
"I love pizza"
]
embeddings = model.encode(texts)
# Full precision encoding (reversible)
full_codes = [encode_eq64(emb.tobytes()) for emb in embeddings]
# Compact similarity hashes (irreversible but fast comparison)
hash_codes = [encode_shq64(emb.tobytes()) for emb in embeddings]
print("Full codes (first 30 chars):")
for i, code in enumerate(full_codes):
print(f" {i}: {code[:30]}...")
print("\nCompact hashes:")
for i, code in enumerate(hash_codes):
print(f" {i}: {code}")
Configuration and Performance
Check Native Extensions
from uubed import has_native_extensions, get_implementation_info
# Check if native extensions are available
if has_native_extensions():
print("🚀 Native acceleration enabled!")
else:
print("⚠️ Using pure Python implementation")
print("Install native extensions: pip install uubed[native]")
# Get detailed implementation info
info = get_implementation_info()
print(f"Implementation: {info['implementation']}")
print(f"Version: {info['version']}")
print(f"Features: {info['features']}")
Performance Configuration
from uubed import Config, encode_eq64
# Create configuration
config = Config(
use_native=True, # Use native implementation if available
chunk_size=8192, # Process in 8KB chunks
num_threads=4, # Parallel processing threads
validate_input=True # Validate input data
)
# Use configuration
large_data = b"x" * 1000000 # 1MB of data
encoded = encode_eq64(large_data, config=config)
Benchmarking
from uubed import benchmark
# Run performance benchmark
results = benchmark()
print("Performance Results:")
print(f"Eq64 encoding: {results['eq64_encode_mb_per_sec']:.1f} MB/s")
print(f"Shq64 hashing: {results['shq64_encode_mb_per_sec']:.1f} MB/s")
print(f"Native available: {results['native_available']}")
Error Handling
Validation
from uubed import validate_eq64, ValidationError
encoded = "SGVs.bG8s.IFFV.YWRC.NjQh"
# Basic validation
if validate_eq64(encoded):
decoded = decode_eq64(encoded)
else:
print("Invalid encoding")
# Detailed validation
try:
validation = validate_eq64(encoded, detailed=True)
if not validation['valid']:
print(f"Validation failed: {validation['errors']}")
except ValidationError as e:
print(f"Validation error: {e}")
Exception Handling
from uubed import DecodingError, EncodingError
try:
# This will fail - invalid encoding
decoded = decode_eq64("invalid.encoding.here")
except DecodingError as e:
print(f"Decoding failed: {e}")
try:
# This might fail - very large input
huge_data = b"x" * (1024 * 1024 * 1024) # 1GB
encoded = encode_eq64(huge_data)
except EncodingError as e:
print(f"Encoding failed: {e}")
Batch Processing
Encoding Multiple Items
from uubed import encode_batch
from concurrent.futures import ProcessPoolExecutor
# Prepare data
documents = ["doc1", "doc2", "doc3"] * 1000
embeddings = [model.encode(doc) for doc in documents]
# Method 1: Built-in batch encoding
encoded_batch = encode_batch(
[emb.tobytes() for emb in embeddings],
method="eq64",
num_workers=4
)
# Method 2: Manual parallel processing
def encode_chunk(chunk):
return [encode_eq64(emb.tobytes()) for emb in chunk]
chunk_size = 100
chunks = [embeddings[i:i+chunk_size]
for i in range(0, len(embeddings), chunk_size)]
with ProcessPoolExecutor(max_workers=4) as executor:
results = list(executor.map(encode_chunk, chunks))
all_encoded = [item for chunk in results for item in chunk]
Streaming Processing
from uubed import StreamEncoder
# For very large files or continuous data
encoder = StreamEncoder("eq64")
def process_large_file(input_path, output_path):
with open(input_path, "rb") as input_file:
with open(output_path, "w") as output_file:
while True:
chunk = input_file.read(4096) # 4KB chunks
if not chunk:
break
encoded_chunk = encoder.encode_chunk(chunk)
output_file.write(encoded_chunk + "\n")
# Write any remaining data
final_chunk = encoder.finalize()
if final_chunk:
output_file.write(final_chunk + "\n")
Common Patterns
Data Pipeline Integration
# ETL pipeline with QuadB64
class DataPipeline:
def __init__(self, variant="eq64"):
self.variant = variant
def extract(self, source):
"""Extract data from source"""
# Your extraction logic
return raw_data
def transform(self, data):
"""Transform and encode data"""
processed = self.process_data(data)
if self.variant == "eq64":
return encode_eq64(processed)
elif self.variant == "shq64":
return encode_shq64(processed)
else:
return encode(processed, method=self.variant)
def load(self, encoded_data, destination):
"""Load encoded data to destination"""
# Store in database, search engine, etc.
destination.store(encoded_data)
# Usage
pipeline = DataPipeline("shq64")
result = pipeline.transform(input_data)
Database Integration
import sqlite3
from uubed import encode_eq64, decode_eq64
# Setup database
conn = sqlite3.connect('embeddings.db')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS vectors (
id INTEGER PRIMARY KEY,
content TEXT,
embedding_eq64 TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# Insert with encoding
def store_embedding(content: str, embedding: np.ndarray):
encoded = encode_eq64(embedding.tobytes())
cursor.execute(
"INSERT INTO vectors (content, embedding_eq64) VALUES (?, ?)",
(content, encoded)
)
conn.commit()
# Retrieve with decoding
def get_embedding(vector_id: int) -> np.ndarray:
cursor.execute(
"SELECT embedding_eq64 FROM vectors WHERE id = ?",
(vector_id,)
)
encoded = cursor.fetchone()[0]
decoded_bytes = decode_eq64(encoded)
return np.frombuffer(decoded_bytes, dtype=np.float32)
Web API Integration
from flask import Flask, request, jsonify
from uubed import encode_eq64, decode_eq64
app = Flask(__name__)
@app.route('/encode', methods=['POST'])
def encode_endpoint():
try:
# Get binary data from request
data = request.get_data()
# Encode
encoded = encode_eq64(data)
return jsonify({
'encoded': encoded,
'original_size': len(data),
'encoded_size': len(encoded)
})
except Exception as e:
return jsonify({'error': str(e)}), 400
@app.route('/decode', methods=['POST'])
def decode_endpoint():
try:
# Get encoded string from request
data = request.json
encoded = data['encoded']
# Decode
decoded = decode_eq64(encoded)
# Return as base64 for JSON compatibility
import base64
return jsonify({
'decoded': base64.b64encode(decoded).decode(),
'size': len(decoded)
})
except Exception as e:
return jsonify({'error': str(e)}), 400
Best Practices Summary
Do’s ✅
- Choose the right variant:
- Eq64 for lossless encoding
- Shq64 for similarity comparison
- T8q64 for sparse data
- Zoq64 for spatial data
-
Use native extensions: Install with
pip install uubed[native]
-
Validate untrusted input: Use
validate_*()
functions -
Handle errors gracefully: Wrap in try-catch blocks
- Batch when possible: Better performance for multiple items
Don’ts ❌
-
Don’t modify encoded strings: They become invalid
-
Don’t mix variants: Each has specific use cases
-
Don’t ignore performance: Check for native extensions
-
Don’t store without validation: Validate critical encoded data
-
Don’t assume reversibility: Only Eq64 is reversible
Next Steps
- Explore Advanced Features
- Learn about Integration Patterns
- Read Performance Tuning
- Check out Real-world Examples