Sync all skills and memories 2026-04-14 07:27
This commit is contained in:
@@ -0,0 +1,33 @@
|
||||
# OBLITERATUS Abliteration Config
|
||||
# Usage: obliteratus run this-file.yaml
|
||||
#
|
||||
# This is for reproducible, version-controlled abliteration runs.
|
||||
# For one-off usage, the CLI flags are simpler.
|
||||
|
||||
# Model to abliterate
|
||||
model:
|
||||
name: "meta-llama/Llama-3.1-8B-Instruct"
|
||||
dtype: "bfloat16" # float16, bfloat16, float32
|
||||
quantization: null # null, "4bit", "8bit"
|
||||
device: "auto" # auto, cuda, cuda:0, cpu
|
||||
|
||||
# Abliteration method and parameters
|
||||
abliteration:
|
||||
method: "informed" # See SKILL.md Step 4 for all 13 methods
|
||||
n_directions: null # null = auto-detect, or integer (e.g., 8)
|
||||
regularization: 0.0 # 0.0-1.0, fraction of original to preserve
|
||||
refinement_passes: 1 # Iterative passes (increase for self-repair)
|
||||
norm_preserve: true # Keep weight norms intact after projection
|
||||
|
||||
# Output
|
||||
output:
|
||||
directory: "./abliterated-models"
|
||||
save_metadata: true # Save abliteration_metadata.json alongside model
|
||||
contribute: false # Save community contribution data
|
||||
|
||||
# Verification
|
||||
verify:
|
||||
enabled: true
|
||||
test_prompts: null # null = use built-in test prompts
|
||||
compute_perplexity: true
|
||||
compute_kl: true
|
||||
@@ -0,0 +1,40 @@
|
||||
# OBLITERATUS Analysis Study Config
|
||||
# Usage: obliteratus run this-file.yaml --preset jailbreak
|
||||
#
|
||||
# Run analysis modules to understand refusal geometry BEFORE abliterating.
|
||||
# Useful for research or when you want to understand what you're removing.
|
||||
|
||||
# Model to analyze
|
||||
model:
|
||||
name: "meta-llama/Llama-3.1-8B-Instruct"
|
||||
dtype: "bfloat16"
|
||||
quantization: "4bit" # Saves VRAM for analysis
|
||||
device: "auto"
|
||||
|
||||
# Study configuration
|
||||
study:
|
||||
# Available presets: quick, full, attention, jailbreak, guardrail, knowledge
|
||||
preset: "jailbreak"
|
||||
|
||||
# Or specify individual strategies:
|
||||
# strategies:
|
||||
# - layer_removal
|
||||
# - head_pruning
|
||||
# - ffn_ablation
|
||||
# - embedding_ablation
|
||||
|
||||
# Analysis modules to run (subset of the 27 available)
|
||||
analysis:
|
||||
- alignment_imprint # Detect DPO/RLHF/CAI/SFT training method
|
||||
- concept_geometry # Map refusal cone geometry
|
||||
- logit_lens # Find which layer decides to refuse
|
||||
- anti_ouroboros # Detect self-repair tendency
|
||||
- cross_layer # Cross-layer alignment clustering
|
||||
- causal_tracing # Causal necessity of components
|
||||
- residual_stream # Attention vs MLP contribution
|
||||
|
||||
# Output
|
||||
output:
|
||||
directory: "./analysis-results"
|
||||
save_plots: true # Generate matplotlib visualizations
|
||||
save_report: true # Generate markdown report
|
||||
@@ -0,0 +1,41 @@
|
||||
# OBLITERATUS Batch Abliteration Config
|
||||
# Abliterate multiple models with the same method for comparison.
|
||||
#
|
||||
# Run each one sequentially:
|
||||
# for model in models; do obliteratus obliterate $model --method informed; done
|
||||
#
|
||||
# Or use this as a reference for which models to process.
|
||||
|
||||
# Common settings
|
||||
defaults:
|
||||
method: "informed"
|
||||
quantization: "4bit"
|
||||
output_dir: "./abliterated-models"
|
||||
|
||||
# Models to process (grouped by compute tier)
|
||||
models:
|
||||
# Small (4-8 GB VRAM)
|
||||
small:
|
||||
- "Qwen/Qwen2.5-1.5B-Instruct"
|
||||
- "microsoft/Phi-3.5-mini-instruct"
|
||||
- "meta-llama/Llama-3.2-3B-Instruct"
|
||||
|
||||
# Medium (8-16 GB VRAM)
|
||||
medium:
|
||||
- "meta-llama/Llama-3.1-8B-Instruct"
|
||||
- "mistralai/Mistral-7B-Instruct-v0.3"
|
||||
- "google/gemma-2-9b-it"
|
||||
- "Qwen/Qwen2.5-7B-Instruct"
|
||||
|
||||
# Large (24 GB VRAM, 4-bit quantization)
|
||||
large:
|
||||
- "Qwen/Qwen2.5-14B-Instruct"
|
||||
- "Qwen/Qwen3-32B"
|
||||
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
||||
|
||||
# Per-model method overrides (optional)
|
||||
overrides:
|
||||
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B":
|
||||
method: "surgical" # CoT-aware for reasoning models
|
||||
"mistralai/Mixtral-8x7B-Instruct-v0.1":
|
||||
method: "nuclear" # Expert-granular for MoE models
|
||||
Reference in New Issue
Block a user