Source code for analysis.plot_cSize

#!/usr/bin/env python3
"""
Compute a *cluster-size distribution* (in **molecules**) from **one**
LAMMPS ``*.DATA`` snapshot and display it as a bar chart.

For simplicity the connectivity is extracted directly from the “Bonds”
section; no fancy spatial search is performed.
"""

from __future__ import annotations

from collections import Counter, defaultdict
from pathlib import Path
from typing import Optional, Tuple, List

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from matplotlib.axes import Axes


[docs] def _read_snapshot(file_path: Path) -> Tuple[List[int], List[Tuple[int, int]]]: mol_ids: list[int] = [] bonds: list[Tuple[int, int]] = [] atom_to_mol: dict[int, int] = {} # NEW mode = None with file_path.open() as fh: for line in fh: clean = line.strip() low = clean.lower() # --- section switches ------------------------------------------------- if low.startswith("atoms"): mode = "atoms" continue if low.startswith("bonds"): mode = "bonds" continue if low.startswith(("angles", # anything we don’t need "velocities", "impropers", "dihedrals")): mode = None continue if not clean or clean[0].isalpha(): continue # --- data lines ------------------------------------------------------- if mode == "atoms": # atom-ID, mol-ID, … atom_id, mol_id = int(clean.split()[0]), int(clean.split()[1]) atom_to_mol[atom_id] = mol_id mol_ids.append(mol_id) elif mode == "bonds": # bond-ID, type, atom-i, atom-j _, _, a, b = clean.split()[:4] bonds.append((int(a), int(b))) # convert atom-level bonds → molecule-level bonds mol_bonds = [(atom_to_mol[a], atom_to_mol[b]) for a, b in bonds] return mol_ids, mol_bonds
[docs] def plot_csize(data_file: str | Path, ax: Optional[Axes] = None) -> Axes: """ Plot the *fraction of molecules* in clusters of size *s* for **one** snapshot. Parameters ---------- data_file LAMMPS ``*.DATA`` snapshot (with Atoms/Bonds sections). ax Optional Matplotlib axis. Returns ------- matplotlib.axes.Axes Bar chart of *y(s)*. Notes ----- * Cluster size here means **number of molecules** (``mol`` IDs) in the connected component. * Uses `networkx` for the connected-component search. """ data_file = Path(data_file) mol_ids, bonds = _read_snapshot(data_file) G = nx.Graph() G.add_nodes_from(mol_ids) # build edges *between molecules* (collapse atom IDs -> mol IDs) for a, b in bonds: G.add_edge(a, b) # Connected components in molecule space sizes = [len(cc) for cc in nx.connected_components(G)] total_mols = len(set(mol_ids)) dist = Counter(sizes) xs = np.array(sorted(dist)) ys = np.array([dist[s] * s / total_mols for s in xs]) if ax is None: _, ax = plt.subplots() ax.bar(xs, ys, width=1.4) ax.set_xlabel("Cluster size (molecules)") ax.set_ylabel("Fraction of molecules in clusters of size s") ax.set_title(data_file.name) return ax