2
0
Fork 0
mirror of https://github.com/MartinThoma/LaTeX-examples.git synced 2025-04-26 06:48:04 +02:00
LaTeX-examples/tikz/histogram-large-1d-dataset/process.py
Martin Thoma 47dec60918 fixed bug
2014-08-07 11:55:55 -04:00

120 lines
4.3 KiB
Python
Executable file

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import shutil
import fileinput
import math
def main(filename, bins, maximum, yticks_number):
with open(filename) as f:
content = f.read().split("\n")
numbers = []
for line in content:
line = line.strip()
if line != "":
numbers.append(float(line))
numbers = sorted(numbers)
minimum = min(numbers)
bin_counter = [0 for i in range(bins+1)]
xticklabels = []
for i, number in enumerate(numbers):
if number >= minimum + (maximum - minimum)/bins*(bins+1):
bin_counter[bins] += 1
elif number < minimum:
bin_counter[0] += 1
else:
for b in range(bins):
lower = minimum + (maximum - minimum)/bins*b
upper = minimum + (maximum - minimum)/bins*(b+1)
if lower <= number < upper:
bin_counter[b] += 1
break
for b in range(bins):
lower = minimum + (maximum - minimum)/bins*b
xticklabels.append(get_xticklabel(lower))
# Get labels for y-axis
yticks = []
ytickslabels = []
maxy = max(bin_counter)
maxylabel = int(10**math.floor(math.log(maxy, 10)))*int(str(maxy)[0])
ylabelsteps = maxylabel / yticks
for i in range(0, maxylabel+1, ylabelsteps):
print("i: %i, %i" % (i, maxylabel))
print("label: %i%s" % get_si_suffix(i))
yticks.append(str(i))
ytickslabels.append(get_yticklabel(i, True))
xticklabels.append("\infty")
return bin_counter, xticklabels, ytickslabels, yticks
def get_xticklabel(value):
return str(int(value))
def get_yticklabel(value, si_suffix):
value = float(value)
if si_suffix:
divide_by, suffix = get_si_suffix(value)
new_value = (value / divide_by)
if int(new_value) == new_value:
return ("%i" % int(new_value)) + suffix
else:
return ("%0.2f" % new_value) + suffix
else:
return str(value)
def get_si_suffix(value):
if value >= 10**3:
return (10**3, "K")
elif value >= 10**6:
return (10**6, "M")
else:
return (1, "")
def modify_template(bin_counter, xticklabels, yticklabels, yticks):
shutil.copyfile("histogram-large-1d-dataset.template.tex",
"histogram-large-1d-dataset.tex")
xticklabels = ", ".join(map(lambda n: "$%s$" % n, xticklabels))
yticklabels = ", ".join(yticklabels)
yticks = ",".join(yticks)
coordinates = ""
for i, value in enumerate(bin_counter):
coordinates += "(%i, %i) " % (i, value)
for line in fileinput.input("histogram-large-1d-dataset.tex",
inplace=True):
line = line.replace("{{xticklabels}}", xticklabels)
line = line.replace("{{yticklabels}}", yticklabels)
line = line.replace("{{yticks}}", yticks)
line = line.replace("{{coordinates}}", coordinates)
print(line, end='')
if __name__ == '__main__':
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
parser = ArgumentParser(description=__doc__,
formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument("-f", "--file", dest="filename",
default="1ddata.txt",
help="use FILE as input data", metavar="FILE")
parser.add_argument("-b", "--bins", dest="bins", type=int,
default=15,
help="how many bins should be used")
parser.add_argument("-m", "--max", dest="max", type=float,
default=15000,
help=("what is the maximum number "
"that should get binned?"))
parser.add_argument("--yticks", dest="yticks", type=int,
default=5,
help=("How many y-ticks should be used?"))
args = parser.parse_args()
bin_counter, xticklabels, yticklabels, yticks = main(args.filename,
args.bins,
args.max,
args.yticks)
modify_template(bin_counter, xticklabels, yticklabels, yticks)