Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix and speedup max_dist and nearest_neighbor bonding algorithms #48

Merged
merged 6 commits into from
Jun 27, 2024

Conversation

janosh
Copy link
Owner

@janosh janosh commented Jun 27, 2024

occasional weird outliers in run time where the original implementation was faster but generally decent speed ups

┌─────────┬──────────┬───────────────┬──────────────┬───────────────────────┬──────────────────────┐
│ (index) │ numAtoms │ orig_max_dist │ new_max_dist │ orig_nearest_neighbor │ new_nearest_neighbor │
├─────────┼──────────┼───────────────┼──────────────┼───────────────────────┼──────────────────────┤
│ 0       │ 10       │ 0.18          │ 0.06         │ 0.19                  │ 0.15                 │
│ 1       │ 50       │ 0.68          │ 0.26         │ 1                     │ 0.4                  │
│ 2       │ 100      │ 1.03          │ 0.52         │ 1.14                  │ 1.06                 │
│ 3       │ 500      │ 20.59         │ 8.04         │ 17.46                 │ 10.83                │
│ 4       │ 1000     │ 64.15         │ 29.11        │ 10.15                 │ 41.88                │
│ 5       │ 5000     │ 1436.54       │ 444.82       │ 228.58                │ 170.48               │
└─────────┴──────────┴───────────────┴──────────────┴───────────────────────┴──────────────────────┘
code to generate run time table

run with node src/lib/structure/perf.bond.js

import { performance } from 'perf_hooks'

// Original functions
const orig_max_dist = (
  structure,
  { max_bond_dist = 3, min_bond_dist = 0.4 } = {},
) => {
  const bonds = []
  const bond_set = new Set()

  for (let idx = 0; idx < structure.sites.length; idx++) {
    const { xyz } = structure.sites[idx]

    for (let idx_2 = idx + 1; idx_2 < structure.sites.length; idx_2++) {
      const { xyz: xyz_2 } = structure.sites[idx_2]

      const dist = Math.sqrt(
        xyz.reduce((sum, _, i) => sum + (xyz[i] - xyz_2[i]) ** 2, 0),
      )
      if (dist <= max_bond_dist && dist >= min_bond_dist) {
        const bond_key = JSON.stringify([xyz, xyz_2].sort())
        if (!bond_set.has(bond_key)) {
          bond_set.add(bond_key)
          bonds.push([xyz, xyz_2, idx, idx_2, dist])
        }
      }
    }
  }
  return bonds
}

const orig_nearest_neighbor = (
  structure,
  { scaling_factor = 1.2, min_bond_dist = 0.1 } = {},
) => {
  const num_sites = structure.sites.length
  const bonds = []
  const bond_set = new Set()

  for (let i = 0; i < num_sites; i++) {
    const { xyz: xyz1 } = structure.sites[i]
    let min_dist = Infinity

    for (let j = 0; j < num_sites; j++) {
      if (i !== j) {
        const { xyz: xyz2 } = structure.sites[j]
        const dist = Math.sqrt(
          xyz1.reduce((sum, _, i) => sum + (xyz1[i] - xyz2[i]) ** 2, 0),
        )
        if (dist >= min_bond_dist && dist < min_dist) {
          min_dist = dist
        }
      }
    }

    for (let j = i + 1; j < num_sites; j++) {
      const { xyz: xyz2 } = structure.sites[j]
      const dist = Math.sqrt(
        xyz1.reduce((sum, _, i) => sum + (xyz1[i] - xyz2[i]) ** 2, 0),
      )

      if (dist >= min_bond_dist && dist <= min_dist * scaling_factor) {
        const bond_key = JSON.stringify([xyz1, xyz2].sort())
        if (!bond_set.has(bond_key)) {
          bond_set.add(bond_key)
          bonds.push([xyz1, xyz2, i, j, dist])
        }
      }
    }
  }

  return bonds
}

// Optimized functions
const new_max_dist = (
  structure,
  { max_bond_dist = 3, min_bond_dist = 0.4 } = {},
) => {
  const bonds = []
  const bond_set = new Set()
  const max_bond_dist_sq = max_bond_dist ** 2
  const min_bond_dist_sq = min_bond_dist ** 2

  for (let i = 0; i < structure.sites.length; i++) {
    const { xyz: xyz1 } = structure.sites[i]

    for (let j = i + 1; j < structure.sites.length; j++) {
      const { xyz: xyz2 } = structure.sites[j]

      const dist_sq = xyz1.reduce(
        (sum, _, i) => sum + (xyz1[i] - xyz2[i]) ** 2,
        0,
      )
      if (dist_sq <= max_bond_dist_sq && dist_sq >= min_bond_dist_sq) {
        const dist = Math.sqrt(dist_sq)
        const bond_key = `${i},${j}`
        if (!bond_set.has(bond_key)) {
          bond_set.add(bond_key)
          bonds.push([xyz1, xyz2, i, j, dist])
        }
      }
    }
  }
  return bonds
}

const new_nearest_neighbor = (
  structure,
  { scaling_factor = 1.2, min_bond_dist = 0.1 } = {},
) => {
  const num_sites = structure.sites.length
  const bonds = []
  const bond_set = new Set()
  const min_bond_dist_sq = min_bond_dist ** 2

  const nearest_distances = new Array(num_sites).fill(Infinity)

  for (let i = 0; i < num_sites; i++) {
    const { xyz: xyz1 } = structure.sites[i]

    for (let j = i + 1; j < num_sites; j++) {
      const { xyz: xyz2 } = structure.sites[j]
      const dist_sq = xyz1.reduce(
        (sum, _, i) => sum + (xyz1[i] - xyz2[i]) ** 2,
        0,
      )

      if (dist_sq >= min_bond_dist_sq) {
        if (dist_sq < nearest_distances[i]) nearest_distances[i] = dist_sq
        if (dist_sq < nearest_distances[j]) nearest_distances[j] = dist_sq
      }
    }
  }

  for (let i = 0; i < num_sites; i++) {
    const { xyz: xyz1 } = structure.sites[i]
    const max_dist_sq = nearest_distances[i] * scaling_factor ** 2

    for (let j = i + 1; j < num_sites; j++) {
      const { xyz: xyz2 } = structure.sites[j]
      const dist_sq = xyz1.reduce(
        (sum, _, i) => sum + (xyz1[i] - xyz2[i]) ** 2,
        0,
      )

      if (dist_sq >= min_bond_dist_sq && dist_sq <= max_dist_sq) {
        const dist = Math.sqrt(dist_sq)
        const bond_key = `${i},${j}`
        if (!bond_set.has(bond_key)) {
          bond_set.add(bond_key)
          bonds.push([xyz1, xyz2, i, j, dist])
        }
      }
    }
  }

  return bonds
}

// Benchmark function
const runBenchmark = (numAtoms) => {
  const structure = {
    sites: Array.from({ length: numAtoms }, () => ({
      xyz: [Math.random() * 10, Math.random() * 10, Math.random() * 10],
    })),
  }

  const start1 = performance.now()
  orig_max_dist(structure)
  const end1 = performance.now()

  const start2 = performance.now()
  new_max_dist(structure)
  const end2 = performance.now()

  const start3 = performance.now()
  orig_nearest_neighbor(structure)
  const end3 = performance.now()

  const start4 = performance.now()
  new_nearest_neighbor(structure)
  const end4 = performance.now()

  return {
    numAtoms,
    orig_max_dist: parseFloat((end1 - start1).toFixed(2)),
    new_max_dist: parseFloat((end2 - start2).toFixed(2)),
    orig_nearest_neighbor: parseFloat((end3 - start3).toFixed(2)),
    new_nearest_neighbor: parseFloat((end4 - start4).toFixed(2)),
  }
}

// Run benchmarks for different numbers of atoms
const atomCounts = [10, 50, 100, 500, 1000, 5000]
const results = atomCounts.map(runBenchmark)

console.table(results)

now testing against performance regressions in tests/unit/bonding.test.ts. max allowed run times might need a CI scaling factor since it probably runs faster on my local machine

@janosh janosh added perf Performance issues or improvements bonding Chemical bonding detection and rendering labels Jun 27, 2024
@janosh janosh merged commit 05d091a into main Jun 27, 2024
4 checks passed
@janosh janosh deleted the fix-and-speedup-bonding-algos branch June 27, 2024 21:59
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bonding Chemical bonding detection and rendering perf Performance issues or improvements
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant