sparseutils

utilities for interacting with sparse files
Log | Files | Refs | README | LICENSE

commit f705100cc8146d2e3dbe2cbf9f461fa8337f8ec0
Author: Richard Ipsum <richardipsum@fastmail.co.uk>
Date:   Sun,  5 Feb 2017 22:56:16 +0000

Initial commit

Diffstat:
Acheck | 13+++++++++++++
Amksparse | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asparsemap | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayarns/implementations.yarn | 9+++++++++
Ayarns/shell_lib | 5+++++
Ayarns/sparsemap.yarn | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 247 insertions(+), 0 deletions(-)

diff --git a/check b/check @@ -0,0 +1,13 @@ +#!/bin/bash + +set -e +set -u + +run_yarn_suite() { + PATH="$(realpath $(dirname check)):$PATH" yarn yarns/sparsemap.yarn \ + yarns/implementations.yarn --stop-on-first-fail -s yarns/shell_lib \ + --tempdir="$(mktemp -d /tmp/gpr_yarn_XXX)" \ + --snapshot --shell='/bin/bash' +} + +run_yarn_suite diff --git a/mksparse b/mksparse @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# Copyright 2013-2014 Lars Wirzenius +# +# Copyright 2017 Richard Ipsum +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# =*= License: GPL-3+ =*= + + +description = '''Create a sparse file. + +The first command line argument is the name of the output file. The +second argument is a specification for how the file is to be made +sparse: a sequence of "data" and "hole" words, which may be +interspersed with spaces, commas, or the word "a", all of which are +ignored, except that the "data" and "hole" words must have something +in between them. + +''' + + +import cliapp +import os +import sys + +DEFAULT_DATA_SIZE = 1024 +DEFAULT_HOLE_SIZE = 1024 ** 2 + +class CreateSparseFile(cliapp.Application): + + def add_settings(self): + self.settings.integer(['hole-size'], 'hole size', + metavar='SIZE', default=DEFAULT_HOLE_SIZE) + self.settings.integer(['data-size'], 'data size', + metavar='SIZE', default=DEFAULT_DATA_SIZE) + + def process_args(self, args): + if len(args) != 1: + raise cliapp.AppException('mksparse OUTPUT_FILE') + + output_filename = args[0] + spec = self.parse_spec() + + with open(output_filename, 'w') as f: + for word in spec: + if word == 'hole': + self.append_hole(f) + else: + assert word == 'data' + self.append_data(f) + + def parse_spec(self): + text = sys.stdin.read() + # Remove commas. + text = ' '.join(text.split(',')) + + # Split into words. + words = text.split() + + # Remove any words that are not "data" or "hole". + spec = [x for x in words if x in ('data', 'hole')] + + return spec + + def append_data(self, f): + f.write('x' * self.settings['data-size']) + f.flush() + + def append_hole(self, f): + fd = f.fileno() + pos = os.lseek(fd, self.settings['hole-size'], os.SEEK_CUR) + os.ftruncate(fd, pos) + + +CreateSparseFile(description=description).run() diff --git a/sparsemap b/sparsemap @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +# +# sparsemap +# +# Copyright © 2017 Richard Ipsum +# +# This file is part of obnam. +# +# obnam is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# obnam is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with obnam. If not, see <http://www.gnu.org/licenses/>. +# + +import sys +import os +import errno + +def sparsemap(fd): + + # First of all, where are we currently, data or hole? + end_of_file_pos = os.lseek(fd, 0, os.SEEK_END) + what = os.SEEK_DATA + pos = os.lseek(fd, 0, os.SEEK_HOLE) + + if pos == 0: + what = os.SEEK_DATA # we are already in a hole + elif pos == end_of_file_pos: + # no holes in this file + print('DATA', end_of_file_pos) + return + else: + what = os.SEEK_HOLE # we were in data + pos = 0 + + while pos < end_of_file_pos: + + current = 'DATA' if what == os.SEEK_HOLE else 'HOLE' + + try: + next_pos = os.lseek(fd, pos, what) + except OSError as e: + if e.errno == errno.ENXIO: + # whatever we were looking for isn't in the file + # that means that either the rest of the file is a hole or data + print(current, end_of_file_pos - pos) + return + + print(current, next_pos - pos) + + pos = next_pos + what = os.SEEK_DATA if what == os.SEEK_HOLE else os.SEEK_HOLE + +if len(sys.argv) != 2: + print('usage: {} FILE'.format(sys.argv[0]), file=sys.stderr) + sys.exit(1) + +fd = os.open(sys.argv[1], os.O_RDONLY) +sparsemap(fd) +os.close(fd) diff --git a/yarns/implementations.yarn b/yarns/implementations.yarn @@ -0,0 +1,9 @@ + IMPLEMENTS GIVEN a sparse file (\S+) with a hole size (\d+) and data size (\d+) with spec ([a-zA-z,]+) + set -e -o pipefail + echo "$MATCH_4" | mksparse "$DATADIR/$MATCH_1" --hole-size "$MATCH_2" --data-size "$MATCH_3" + + IMPLEMENTS WHEN we run sparsemap on (\w+) + run_sparsemap "$DATADIR/$MATCH_1" + + IMPLEMENTS THEN stdout contains exactly (.+) + diff -u <(echo -e "$MATCH_1") "$DATADIR/STDOUT" diff --git a/yarns/shell_lib b/yarns/shell_lib @@ -0,0 +1,5 @@ +set -e -u -o pipefail + +run_sparsemap() { + sparsemap $@ > "$DATADIR/STDOUT" +} diff --git a/yarns/sparsemap.yarn b/yarns/sparsemap.yarn @@ -0,0 +1,65 @@ +sparse map tests +================ + +Please note that these tests will depend very much on the file +system, it is up to the file system to decide how many zeroes +are required before it decides to represent the hole as an actual +hole rather than just a run of zeroes. + +At time of writing (2017) these tests pass on ext4, +where the default block size is 4096 bytes. + + SCENARIO sparsemap single data followed by single hole + GIVEN a sparse file S with a hole size 4096 and data size 8192 with spec data,hole + WHEN we run sparsemap on S + THEN stdout contains exactly DATA 8192\nHOLE 4096 + + SCENARIO sparsemap single hole followed by single data + GIVEN a sparse file S with a hole size 4096 and data size 8192 with spec hole,data + WHEN we run sparsemap on S + THEN stdout contains exactly HOLE 4096\nDATA 8192 + + SCENARIO sparsemap hole followed by data followed by hole + GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec hole,data,hole + WHEN we run sparsemap on S + THEN stdout contains exactly HOLE 8192\nDATA 4096\nHOLE 8192 + + SCENARIO tiny hole can look like data + GIVEN a sparse file S with a hole size 6 and data size 4096 with spec hole,data,hole + WHEN we run sparsemap on S + THEN stdout contains exactly DATA 4108 + + SCENARIO sparsemap data followed by hole followed by data + GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data,hole,data + WHEN we run sparsemap on S + THEN stdout contains exactly DATA 4096\nHOLE 8192\nDATA 4096 + + SCENARIO sparsemap sequence of holes + GIVEN a sparse file S with a hole size 4096 and data size 8192 with spec data,hole,hole,data + WHEN we run sparsemap on S + THEN stdout contains exactly DATA 8192\nHOLE 8192\nDATA 8192 + + SCENARIO sparsemap sequence of holes (ending with hole) + GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data,hole,hole,hole + WHEN we run sparsemap on S + THEN stdout contains exactly DATA 4096\nHOLE 24576 + + SCENARIO sparsemap file with only data + GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data + WHEN we run sparsemap on S + THEN stdout contains exactly DATA 4096 + + SCENARIO sparsemap file with only hole + GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec hole + WHEN we run sparsemap on S + THEN stdout contains exactly HOLE 8192 + + SCENARIO sparsemap file with data,hole,data,hole,data + GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data,hole,data,hole,data + WHEN we run sparsemap on S + THEN stdout contains exactly DATA 4096\nHOLE 8192\nDATA 4096\nHOLE 8192\nDATA 4096 + + SCENARIO sparsemap file with hole,data,hole,data,hole + GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec hole,data,hole,data,hole + WHEN we run sparsemap on S + THEN stdout contains exactly HOLE 8192\nDATA 4096\nHOLE 8192\nDATA 4096\nHOLE 8192