From 716caa21841d31227bfac983bf654e0c987c7302 Mon Sep 17 00:00:00 2001
From: Chris Ringrose <119854158+cringroseccdc@users.noreply.github.com>
Date: Tue, 3 Feb 2026 18:16:44 +0000
Subject: [PATCH] Adding two scripts from ccdc confidential for conformer
 analysis. Original scripts located here:
 https://github.com/ccdc-confidential/cpp-apps-main/tree/main/wrapping/contributed

The scripts have been heavily edited for usability, clarity and maintainability, but the science is the same.
---
 scripts/conformer_filter_density/ReadMe.md    |   5 +
 .../conformer_filter_density.py               | 189 ++++++++++++++++++
 scripts/filter_poses/ReadMe.md                |   5 +
 scripts/filter_poses/filter_poses.py          | 189 ++++++++++++++++++
 4 files changed, 388 insertions(+)
 create mode 100644 scripts/conformer_filter_density/ReadMe.md
 create mode 100644 scripts/conformer_filter_density/conformer_filter_density.py
 create mode 100644 scripts/filter_poses/ReadMe.md
 create mode 100644 scripts/filter_poses/filter_poses.py
diff --git a/scripts/conformer_filter_density/ReadMe.md b/scripts/conformer_filter_density/ReadMe.md
new file mode 100644
index 0000000..020d3ee
--- /dev/null
+++ b/scripts/conformer_filter_density/ReadMe.md
@@ -0,0 +1,5 @@
+# Conformer Filter Density
+
+Filter conformers using a variety of metrics. See arguments in script for details.
+
+CCDC Python API Licence required, minimum version: 3.0.15
diff --git a/scripts/conformer_filter_density/conformer_filter_density.py b/scripts/conformer_filter_density/conformer_filter_density.py
new file mode 100644
index 0000000..8b684b8
--- /dev/null
+++ b/scripts/conformer_filter_density/conformer_filter_density.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+#
+# This script can be used for any purpose without limitation subject to the
+# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
+#
+# This permission notice and the following statement of attribution must be
+# included in all copies or substantial portions of this script.
+#
+# 2026-02-03: created by the Cambridge Crystallographic Data Centre
+
+import argparse
+import csv
+
+from ccdc import conformer, io
+
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description=__doc__)
+
+    parser.add_argument('inmolfn',
+                        metavar='<input molecule file>',
+                        help='Input file (single- or multi-molecule file)')
+
+    parser.add_argument('-m', '--mode',
+                        choices=['absolute', 'relative'],
+                        default='absolute',
+                        help='Limit mode: absolute (fixed threshold) or relative '
+                             '(threshold based on molecule with fewest unusual torsions). '
+                             'WARNING: Relative mode may behave unexpectedly with conformers '
+                             'from multiple input molecules (default: %(default)s)')
+
+    parser.add_argument('-l', '--limit',
+                        dest='torsion_limit',
+                        type=int,
+                        default=0,
+                        metavar='<limit>',
+                        help='Maximum number of unusual torsions for a passing molecule '
+                             '(default: %(default)s)')
+
+    parser.add_argument('-d', '--local-density',
+                        dest='local_density_threshold',
+                        type=float,
+                        default=10.0,
+                        metavar='<threshold>',
+                        help='Local density threshold for classifying a torsion as unusual '
+                             '(default: %(default)s)')
+
+    parser.add_argument('--incl-organometallics',
+                        dest='incl_organometallics',
+                        action='store_true',
+                        help='Include organometallic compounds in the search '
+                             '(default: organic compounds only)')
+
+    parser.add_argument('--generalisation',
+                        action='store_true',
+                        help='Turn on generalisation for searches')
+
+    parser.add_argument('--successfn',
+                        default='successes.mol',
+                        metavar='<file>',
+                        help='Output file for molecules that pass the filter '
+                             '(default: %(default)s)')
+
+    parser.add_argument('--failurefn',
+                        default='failures.mol',
+                        metavar='<file>',
+                        help='Output file for molecules that fail the filter '
+                             '(default: %(default)s)')
+
+    parser.add_argument('-u', '--unusual-torsions',
+                        dest='unusualtorsionfn',
+                        default='unusual_torsions.csv',
+                        metavar='<file>',
+                        help='Output CSV file for unusual torsion details '
+                             '(default: %(default)s)')
+
+    return parser.parse_args()
+
+
+def create_mogul_engine(local_density_threshold, incl_organometallics, generalisation):
+    """Create and configure a geometry analyser engine.
+
+    Args:
+        local_density_threshold: Threshold for classifying torsions as unusual
+        incl_organometallics: Whether to include organometallic compounds
+        generalisation: Whether to enable generalisation for searches
+
+    Returns:
+        Configured ccdc.conformer.GeometryAnalyser instance
+    """
+    engine = conformer.GeometryAnalyser()
+
+    engine.settings.bond.analyse = False
+    engine.settings.angle.analyse = False
+    engine.settings.ring.analyse = False
+
+    engine.settings.torsion.local_density_threshold = local_density_threshold
+    engine.settings.generalisation = generalisation
+    engine.settings.organometallic_filter = 'all' if incl_organometallics else 'organics_only'
+
+    return engine
+
+
+def analysis(torsion_limit, input_filename, mode, engine, success_file, failure_file, unusual_torsion_file):
+    """Analyze molecules for unusual torsions and filter based on criteria.
+
+    Args:
+        torsion_limit: Maximum number of unusual torsions allowed
+        input_filename: Path to input molecule file
+        mode: 'absolute' or 'relative' filtering mode
+        engine: Configured GeometryAnalyser instance
+        success_file: Path to output file for passing molecules
+        failure_file: Path to output file for failing molecules
+        unusual_torsion_file: Path to CSV file for unusual torsion details
+    """
+    # Analyze all molecules and collect unusual torsion data
+    molecules = []
+    min_unusual_torsions = float('inf')
+
+    with io.MoleculeReader(input_filename) as mol_reader:
+        for molecule in mol_reader:
+            molecule.standardise_aromatic_bonds()
+            molecule.standardise_delocalised_bonds()
+
+            geometry_analysed_molecule = engine.analyse_molecule(molecule)
+
+            molecule.unusual_torsions = [
+                t for t in geometry_analysed_molecule.analysed_torsions
+                if t.unusual and t.enough_hits
+            ]
+            molecule.num_unusual_torsions = len(molecule.unusual_torsions)
+            molecules.append(molecule)
+
+            min_unusual_torsions = min(min_unusual_torsions, molecule.num_unusual_torsions)
+
+    # Write results
+    with io.MoleculeWriter(success_file) as passed_writer, \
+         io.MoleculeWriter(failure_file) as failed_writer, \
+         open(unusual_torsion_file, 'w', newline='') as csv_file:
+
+        csv_writer = csv.writer(csv_file)
+        csv_writer.writerow(['MoleculeIndex', 'Value', 'Zscore', 'LocalDensity', 'NumHits', 'Atoms'])
+
+        for idx, molecule in enumerate(molecules):
+            threshold = torsion_limit if mode == 'absolute' else min_unusual_torsions + torsion_limit
+            failed = molecule.num_unusual_torsions > threshold
+
+            if failed:
+                failed_writer.write(molecule)
+                for torsion in molecule.unusual_torsions:
+                    csv_writer.writerow([
+                        idx,
+                        torsion.value,
+                        torsion.z_score,
+                        torsion.local_density,
+                        torsion.nhits,
+                        ' '.join(torsion.atom_labels)
+                    ])
+            else:
+                passed_writer.write(molecule)
+
+
+def run():
+    """Main entry point for the script."""
+    args = parse_args()
+
+    if args.torsion_limit < 0:
+        raise ValueError('Torsion limit must be >= 0')
+
+    engine = create_mogul_engine(
+        args.local_density_threshold,
+        args.incl_organometallics,
+        args.generalisation
+    )
+
+    analysis(
+        args.torsion_limit,
+        args.inmolfn,
+        args.mode,
+        engine,
+        args.successfn,
+        args.failurefn,
+        args.unusualtorsionsfn,
+    )
+
+
+if __name__ == '__main__':
+    run()
diff --git a/scripts/filter_poses/ReadMe.md b/scripts/filter_poses/ReadMe.md
new file mode 100644
index 0000000..e909ab5
--- /dev/null
+++ b/scripts/filter_poses/ReadMe.md
@@ -0,0 +1,5 @@
+# Filter Poses
+
+This is a short script to filter molecular poses in a multi-molecule file based on the torsion probabilities.
+
+CCDC Python API Licence required, minimum version: 3.0.15
diff --git a/scripts/filter_poses/filter_poses.py b/scripts/filter_poses/filter_poses.py
new file mode 100644
index 0000000..3827e16
--- /dev/null
+++ b/scripts/filter_poses/filter_poses.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+#
+# This script can be used for any purpose without limitation subject to the
+# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
+#
+# This permission notice and the following statement of attribution must be
+# included in all copies or substantial portions of this script.
+#
+# 2025-02-03: created by the Cambridge Crystallographic Data Centre
+
+import argparse
+import copy
+import csv
+import math
+
+from ccdc import conformer, io
+
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description=__doc__)
+
+    parser.add_argument('conformer_file',
+                        metavar='<input file>',
+                        help='Input file (multi-molecule file)')
+
+    parser.add_argument('-csv', '--write-csv',
+                        dest='write_csv',
+                        action='store_true',
+                        help='Write a csv file for all the analysed conformers.')
+
+    return parser.parse_args()
+
+
+class ProbabilityScorer:
+    """
+    Use the ConformerGenerator and GeometryAnalyser to score conformers based on their conformational
+    probabilities and unusual torsions.
+    """
+    def __init__(self, user_conformer_generator_settings=None, skip_minimisation=True,
+                 user_mogul_analysis_settings=None):
+
+        self._generator = self._create_conformer_generator(user_conformer_generator_settings, skip_minimisation)
+        self._mogul_analysis_engine = self._create_analyser(user_mogul_analysis_settings)
+
+    def _create_analyser(self, user_mogul_analyser_settings):
+        """Create a GeometryAnalyser engine to analyse the conformers."""
+        engine = conformer.GeometryAnalyser()
+
+        # Tweak the 'default defaults'
+        # By default, in this use case, we do not want to use generalisation.
+        engine.settings.generalisation = False
+        # By default, only the organic subset, i.e. exclude organometallic
+        engine.settings.organometallic_filter = 'Organic'
+
+        # Ensure user settings are kept:
+        if user_mogul_analyser_settings is not None:
+            engine.settings = copy.copy(user_mogul_analyser_settings)
+
+        # Only analyse torsions:
+        engine.settings.bond.analyse = False
+        engine.settings.angle.analyse = False
+        engine.settings.ring.analyse = False
+
+        return engine
+
+    def _create_conformer_generator(self, user_generator_settings, skip_minimisation=True):
+        """Create a ConformerGenerator engine to generate conformers for the molecules."""
+        settings = conformer.ConformerSettings()
+        if user_generator_settings is not None:
+            settings = copy.copy(user_generator_settings)
+
+        # Mandatory setting here: this must work in use_input_torsion_distributions mode.
+        settings.use_input_torsion_distributions = True
+
+        engine = conformer.ConformerGenerator(settings, skip_minimisation)
+        return engine
+
+    @staticmethod
+    def _combined_local_density(all_local_densities):
+        if all_local_densities:
+            return sum(all_local_densities) / len(all_local_densities)
+        return 100.0
+
+    def probability_analysis(self, molecule, bad_normalised_score=1.0, bad_probability_score=0.0,
+                             bad_rmsd_score=9999.9):
+
+        # Approximation excluding rings:
+        is_rigid = sum(bond.is_rotatable for bond in molecule.bonds) == 0
+
+        conformers = self._generator.generate(molecule)
+
+        normalised_score = None
+        rmsd = None
+        ln_probability = None
+
+        if conformers:
+            # First conformer is the most likely, so return its score.
+            normalised_score = conformers[0].normalised_score
+            rmsd = conformers[0].rmsd()
+            ln_probability = conformers[0].probability
+
+            if is_rigid:
+                if ln_probability is None:
+                    ln_probability = 0.0
+                if normalised_score is None:
+                    normalised_score = 0.0
+
+        if normalised_score is None:
+            normalised_score = bad_normalised_score
+
+        if rmsd is None:
+            rmsd = bad_rmsd_score
+
+        probability = bad_probability_score
+        if ln_probability is not None:
+            probability = math.exp(ln_probability)
+
+        return normalised_score, probability, rmsd
+
+    def unusual_torsions_analysis(self, molecule, max_hist_size=15):
+        checked_mol = self._mogul_analysis_engine.analyse_molecule(molecule)
+        unusual_count = 0
+        local_densities = []
+        unusual_local_densities = []
+        no_data_torsions = 0
+        for tor in checked_mol.analysed_torsions:
+            hist_size = sum(tor.histogram())
+            if hist_size > max_hist_size:
+                local_densities.append(tor.local_density)
+
+                if tor.unusual:
+                    unusual_local_densities.append(tor.local_density)
+                    unusual_count += 1
+            else:
+                no_data_torsions += 1
+
+                # Expected number of torsions within 10 degrees assuming even distribution of observations.
+                # Number of Mogul bins would be strictly better than assuming 18.
+                uniform_torsion_dist = 100 / 18
+                local_densities.append(uniform_torsion_dist)
+        return unusual_count, self._combined_local_density(local_densities), self._combined_local_density(
+            unusual_local_densities), no_data_torsions
+
+    def process_molecule(self, molecule):
+        molecule.remove_unknown_atoms()
+        molecule.assign_bond_types()
+
+        unusual_count, avg_local_density, avg_unusual_local_density, no_data_torsions = self.unusual_torsions_analysis(
+            molecule)
+        normalised_score, probability, rmsd = self.probability_analysis(molecule)
+
+        return {
+            "identifier": molecule.identifier,
+            "number of unusual torsions": unusual_count,
+            "average local density": avg_local_density,
+            "average unusual local density": avg_unusual_local_density,
+            "number of torsions with no data": no_data_torsions,
+            "normalised probability score": normalised_score,
+            "probability score": probability,
+            "RMSD to input conformation": rmsd,
+        }
+
+
+def run():
+    args = parse_args()
+
+    p = ProbabilityScorer()
+
+    all_data = []
+
+    with io.MoleculeReader(args.conformer_file) as reader:
+        for i, molecule in enumerate(reader):
+            data = p.process_molecule(molecule)
+            if i == 0:
+                print(", ".join(data.keys()))
+            print(", ".join(str(value) for value in data.values()))
+            all_data.append(data)
+
+    if args.write_csv:
+        keys = all_data[0].keys()
+        with open('filtered_poses_analysis.csv', 'w', newline='') as output_file:
+            dict_writer = csv.DictWriter(output_file, keys)
+            dict_writer.writeheader()
+            dict_writer.writerows(all_data)
+
+
+if __name__ == "__main__":
+    run()