From 716caa21841d31227bfac983bf654e0c987c7302 Mon Sep 17 00:00:00 2001
From: Chris Ringrose <119854158+cringroseccdc@users.noreply.github.com>
Date: Tue, 3 Feb 2026 18:16:44 +0000
Subject: [PATCH] Adding two scripts from ccdc confidential for conformer
analysis. Original scripts located here:
https://github.com/ccdc-confidential/cpp-apps-main/tree/main/wrapping/contributed
The scripts have been heavily edited for usability, clarity and maintainability, but the science is the same.
---
scripts/conformer_filter_density/ReadMe.md | 5 +
.../conformer_filter_density.py | 189 ++++++++++++++++++
scripts/filter_poses/ReadMe.md | 5 +
scripts/filter_poses/filter_poses.py | 189 ++++++++++++++++++
4 files changed, 388 insertions(+)
create mode 100644 scripts/conformer_filter_density/ReadMe.md
create mode 100644 scripts/conformer_filter_density/conformer_filter_density.py
create mode 100644 scripts/filter_poses/ReadMe.md
create mode 100644 scripts/filter_poses/filter_poses.py
diff --git a/scripts/conformer_filter_density/ReadMe.md b/scripts/conformer_filter_density/ReadMe.md
new file mode 100644
index 0000000..020d3ee
--- /dev/null
+++ b/scripts/conformer_filter_density/ReadMe.md
@@ -0,0 +1,5 @@
+# Conformer Filter Density
+
+Filter conformers using a variety of metrics. See arguments in script for details.
+
+CCDC Python API Licence required, minimum version: 3.0.15
diff --git a/scripts/conformer_filter_density/conformer_filter_density.py b/scripts/conformer_filter_density/conformer_filter_density.py
new file mode 100644
index 0000000..8b684b8
--- /dev/null
+++ b/scripts/conformer_filter_density/conformer_filter_density.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+#
+# This script can be used for any purpose without limitation subject to the
+# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
+#
+# This permission notice and the following statement of attribution must be
+# included in all copies or substantial portions of this script.
+#
+# 2026-02-03: created by the Cambridge Crystallographic Data Centre
+
+import argparse
+import csv
+
+from ccdc import conformer, io
+
+
+def parse_args():
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(description=__doc__)
+
+ parser.add_argument('inmolfn',
+ metavar='',
+ help='Input file (single- or multi-molecule file)')
+
+ parser.add_argument('-m', '--mode',
+ choices=['absolute', 'relative'],
+ default='absolute',
+ help='Limit mode: absolute (fixed threshold) or relative '
+ '(threshold based on molecule with fewest unusual torsions). '
+ 'WARNING: Relative mode may behave unexpectedly with conformers '
+ 'from multiple input molecules (default: %(default)s)')
+
+ parser.add_argument('-l', '--limit',
+ dest='torsion_limit',
+ type=int,
+ default=0,
+ metavar='',
+ help='Maximum number of unusual torsions for a passing molecule '
+ '(default: %(default)s)')
+
+ parser.add_argument('-d', '--local-density',
+ dest='local_density_threshold',
+ type=float,
+ default=10.0,
+ metavar='',
+ help='Local density threshold for classifying a torsion as unusual '
+ '(default: %(default)s)')
+
+ parser.add_argument('--incl-organometallics',
+ dest='incl_organometallics',
+ action='store_true',
+ help='Include organometallic compounds in the search '
+ '(default: organic compounds only)')
+
+ parser.add_argument('--generalisation',
+ action='store_true',
+ help='Turn on generalisation for searches')
+
+ parser.add_argument('--successfn',
+ default='successes.mol',
+ metavar='',
+ help='Output file for molecules that pass the filter '
+ '(default: %(default)s)')
+
+ parser.add_argument('--failurefn',
+ default='failures.mol',
+ metavar='',
+ help='Output file for molecules that fail the filter '
+ '(default: %(default)s)')
+
+ parser.add_argument('-u', '--unusual-torsions',
+ dest='unusualtorsionfn',
+ default='unusual_torsions.csv',
+ metavar='',
+ help='Output CSV file for unusual torsion details '
+ '(default: %(default)s)')
+
+ return parser.parse_args()
+
+
+def create_mogul_engine(local_density_threshold, incl_organometallics, generalisation):
+ """Create and configure a geometry analyser engine.
+
+ Args:
+ local_density_threshold: Threshold for classifying torsions as unusual
+ incl_organometallics: Whether to include organometallic compounds
+ generalisation: Whether to enable generalisation for searches
+
+ Returns:
+ Configured ccdc.conformer.GeometryAnalyser instance
+ """
+ engine = conformer.GeometryAnalyser()
+
+ engine.settings.bond.analyse = False
+ engine.settings.angle.analyse = False
+ engine.settings.ring.analyse = False
+
+ engine.settings.torsion.local_density_threshold = local_density_threshold
+ engine.settings.generalisation = generalisation
+ engine.settings.organometallic_filter = 'all' if incl_organometallics else 'organics_only'
+
+ return engine
+
+
+def analysis(torsion_limit, input_filename, mode, engine, success_file, failure_file, unusual_torsion_file):
+ """Analyze molecules for unusual torsions and filter based on criteria.
+
+ Args:
+ torsion_limit: Maximum number of unusual torsions allowed
+ input_filename: Path to input molecule file
+ mode: 'absolute' or 'relative' filtering mode
+ engine: Configured GeometryAnalyser instance
+ success_file: Path to output file for passing molecules
+ failure_file: Path to output file for failing molecules
+ unusual_torsion_file: Path to CSV file for unusual torsion details
+ """
+ # Analyze all molecules and collect unusual torsion data
+ molecules = []
+ min_unusual_torsions = float('inf')
+
+ with io.MoleculeReader(input_filename) as mol_reader:
+ for molecule in mol_reader:
+ molecule.standardise_aromatic_bonds()
+ molecule.standardise_delocalised_bonds()
+
+ geometry_analysed_molecule = engine.analyse_molecule(molecule)
+
+ molecule.unusual_torsions = [
+ t for t in geometry_analysed_molecule.analysed_torsions
+ if t.unusual and t.enough_hits
+ ]
+ molecule.num_unusual_torsions = len(molecule.unusual_torsions)
+ molecules.append(molecule)
+
+ min_unusual_torsions = min(min_unusual_torsions, molecule.num_unusual_torsions)
+
+ # Write results
+ with io.MoleculeWriter(success_file) as passed_writer, \
+ io.MoleculeWriter(failure_file) as failed_writer, \
+ open(unusual_torsion_file, 'w', newline='') as csv_file:
+
+ csv_writer = csv.writer(csv_file)
+ csv_writer.writerow(['MoleculeIndex', 'Value', 'Zscore', 'LocalDensity', 'NumHits', 'Atoms'])
+
+ for idx, molecule in enumerate(molecules):
+ threshold = torsion_limit if mode == 'absolute' else min_unusual_torsions + torsion_limit
+ failed = molecule.num_unusual_torsions > threshold
+
+ if failed:
+ failed_writer.write(molecule)
+ for torsion in molecule.unusual_torsions:
+ csv_writer.writerow([
+ idx,
+ torsion.value,
+ torsion.z_score,
+ torsion.local_density,
+ torsion.nhits,
+ ' '.join(torsion.atom_labels)
+ ])
+ else:
+ passed_writer.write(molecule)
+
+
+def run():
+ """Main entry point for the script."""
+ args = parse_args()
+
+ if args.torsion_limit < 0:
+ raise ValueError('Torsion limit must be >= 0')
+
+ engine = create_mogul_engine(
+ args.local_density_threshold,
+ args.incl_organometallics,
+ args.generalisation
+ )
+
+ analysis(
+ args.torsion_limit,
+ args.inmolfn,
+ args.mode,
+ engine,
+ args.successfn,
+ args.failurefn,
+ args.unusualtorsionsfn,
+ )
+
+
+if __name__ == '__main__':
+ run()
diff --git a/scripts/filter_poses/ReadMe.md b/scripts/filter_poses/ReadMe.md
new file mode 100644
index 0000000..e909ab5
--- /dev/null
+++ b/scripts/filter_poses/ReadMe.md
@@ -0,0 +1,5 @@
+# Filter Poses
+
+This is a short script to filter molecular poses in a multi-molecule file based on the torsion probabilities.
+
+CCDC Python API Licence required, minimum version: 3.0.15
diff --git a/scripts/filter_poses/filter_poses.py b/scripts/filter_poses/filter_poses.py
new file mode 100644
index 0000000..3827e16
--- /dev/null
+++ b/scripts/filter_poses/filter_poses.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+#
+# This script can be used for any purpose without limitation subject to the
+# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
+#
+# This permission notice and the following statement of attribution must be
+# included in all copies or substantial portions of this script.
+#
+# 2025-02-03: created by the Cambridge Crystallographic Data Centre
+
+import argparse
+import copy
+import csv
+import math
+
+from ccdc import conformer, io
+
+
+def parse_args():
+ """Parse command line arguments."""
+ parser = argparse.ArgumentParser(description=__doc__)
+
+ parser.add_argument('conformer_file',
+ metavar='',
+ help='Input file (multi-molecule file)')
+
+ parser.add_argument('-csv', '--write-csv',
+ dest='write_csv',
+ action='store_true',
+ help='Write a csv file for all the analysed conformers.')
+
+ return parser.parse_args()
+
+
+class ProbabilityScorer:
+ """
+ Use the ConformerGenerator and GeometryAnalyser to score conformers based on their conformational
+ probabilities and unusual torsions.
+ """
+ def __init__(self, user_conformer_generator_settings=None, skip_minimisation=True,
+ user_mogul_analysis_settings=None):
+
+ self._generator = self._create_conformer_generator(user_conformer_generator_settings, skip_minimisation)
+ self._mogul_analysis_engine = self._create_analyser(user_mogul_analysis_settings)
+
+ def _create_analyser(self, user_mogul_analyser_settings):
+ """Create a GeometryAnalyser engine to analyse the conformers."""
+ engine = conformer.GeometryAnalyser()
+
+ # Tweak the 'default defaults'
+ # By default, in this use case, we do not want to use generalisation.
+ engine.settings.generalisation = False
+ # By default, only the organic subset, i.e. exclude organometallic
+ engine.settings.organometallic_filter = 'Organic'
+
+ # Ensure user settings are kept:
+ if user_mogul_analyser_settings is not None:
+ engine.settings = copy.copy(user_mogul_analyser_settings)
+
+ # Only analyse torsions:
+ engine.settings.bond.analyse = False
+ engine.settings.angle.analyse = False
+ engine.settings.ring.analyse = False
+
+ return engine
+
+ def _create_conformer_generator(self, user_generator_settings, skip_minimisation=True):
+ """Create a ConformerGenerator engine to generate conformers for the molecules."""
+ settings = conformer.ConformerSettings()
+ if user_generator_settings is not None:
+ settings = copy.copy(user_generator_settings)
+
+ # Mandatory setting here: this must work in use_input_torsion_distributions mode.
+ settings.use_input_torsion_distributions = True
+
+ engine = conformer.ConformerGenerator(settings, skip_minimisation)
+ return engine
+
+ @staticmethod
+ def _combined_local_density(all_local_densities):
+ if all_local_densities:
+ return sum(all_local_densities) / len(all_local_densities)
+ return 100.0
+
+ def probability_analysis(self, molecule, bad_normalised_score=1.0, bad_probability_score=0.0,
+ bad_rmsd_score=9999.9):
+
+ # Approximation excluding rings:
+ is_rigid = sum(bond.is_rotatable for bond in molecule.bonds) == 0
+
+ conformers = self._generator.generate(molecule)
+
+ normalised_score = None
+ rmsd = None
+ ln_probability = None
+
+ if conformers:
+ # First conformer is the most likely, so return its score.
+ normalised_score = conformers[0].normalised_score
+ rmsd = conformers[0].rmsd()
+ ln_probability = conformers[0].probability
+
+ if is_rigid:
+ if ln_probability is None:
+ ln_probability = 0.0
+ if normalised_score is None:
+ normalised_score = 0.0
+
+ if normalised_score is None:
+ normalised_score = bad_normalised_score
+
+ if rmsd is None:
+ rmsd = bad_rmsd_score
+
+ probability = bad_probability_score
+ if ln_probability is not None:
+ probability = math.exp(ln_probability)
+
+ return normalised_score, probability, rmsd
+
+ def unusual_torsions_analysis(self, molecule, max_hist_size=15):
+ checked_mol = self._mogul_analysis_engine.analyse_molecule(molecule)
+ unusual_count = 0
+ local_densities = []
+ unusual_local_densities = []
+ no_data_torsions = 0
+ for tor in checked_mol.analysed_torsions:
+ hist_size = sum(tor.histogram())
+ if hist_size > max_hist_size:
+ local_densities.append(tor.local_density)
+
+ if tor.unusual:
+ unusual_local_densities.append(tor.local_density)
+ unusual_count += 1
+ else:
+ no_data_torsions += 1
+
+ # Expected number of torsions within 10 degrees assuming even distribution of observations.
+ # Number of Mogul bins would be strictly better than assuming 18.
+ uniform_torsion_dist = 100 / 18
+ local_densities.append(uniform_torsion_dist)
+ return unusual_count, self._combined_local_density(local_densities), self._combined_local_density(
+ unusual_local_densities), no_data_torsions
+
+ def process_molecule(self, molecule):
+ molecule.remove_unknown_atoms()
+ molecule.assign_bond_types()
+
+ unusual_count, avg_local_density, avg_unusual_local_density, no_data_torsions = self.unusual_torsions_analysis(
+ molecule)
+ normalised_score, probability, rmsd = self.probability_analysis(molecule)
+
+ return {
+ "identifier": molecule.identifier,
+ "number of unusual torsions": unusual_count,
+ "average local density": avg_local_density,
+ "average unusual local density": avg_unusual_local_density,
+ "number of torsions with no data": no_data_torsions,
+ "normalised probability score": normalised_score,
+ "probability score": probability,
+ "RMSD to input conformation": rmsd,
+ }
+
+
+def run():
+ args = parse_args()
+
+ p = ProbabilityScorer()
+
+ all_data = []
+
+ with io.MoleculeReader(args.conformer_file) as reader:
+ for i, molecule in enumerate(reader):
+ data = p.process_molecule(molecule)
+ if i == 0:
+ print(", ".join(data.keys()))
+ print(", ".join(str(value) for value in data.values()))
+ all_data.append(data)
+
+ if args.write_csv:
+ keys = all_data[0].keys()
+ with open('filtered_poses_analysis.csv', 'w', newline='') as output_file:
+ dict_writer = csv.DictWriter(output_file, keys)
+ dict_writer.writeheader()
+ dict_writer.writerows(all_data)
+
+
+if __name__ == "__main__":
+ run()