commit f705100cc8146d2e3dbe2cbf9f461fa8337f8ec0
Author: Richard Ipsum <richardipsum@fastmail.co.uk>
Date: Sun, 5 Feb 2017 22:56:16 +0000
Initial commit
Diffstat:
6 files changed, 247 insertions(+), 0 deletions(-)
diff --git a/check b/check
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -e
+set -u
+
+run_yarn_suite() {
+ PATH="$(realpath $(dirname check)):$PATH" yarn yarns/sparsemap.yarn \
+ yarns/implementations.yarn --stop-on-first-fail -s yarns/shell_lib \
+ --tempdir="$(mktemp -d /tmp/gpr_yarn_XXX)" \
+ --snapshot --shell='/bin/bash'
+}
+
+run_yarn_suite
diff --git a/mksparse b/mksparse
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+# Copyright 2013-2014 Lars Wirzenius
+#
+# Copyright 2017 Richard Ipsum
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =*= License: GPL-3+ =*=
+
+
+description = '''Create a sparse file.
+
+The first command line argument is the name of the output file. The
+second argument is a specification for how the file is to be made
+sparse: a sequence of "data" and "hole" words, which may be
+interspersed with spaces, commas, or the word "a", all of which are
+ignored, except that the "data" and "hole" words must have something
+in between them.
+
+'''
+
+
+import cliapp
+import os
+import sys
+
+DEFAULT_DATA_SIZE = 1024
+DEFAULT_HOLE_SIZE = 1024 ** 2
+
+class CreateSparseFile(cliapp.Application):
+
+ def add_settings(self):
+ self.settings.integer(['hole-size'], 'hole size',
+ metavar='SIZE', default=DEFAULT_HOLE_SIZE)
+ self.settings.integer(['data-size'], 'data size',
+ metavar='SIZE', default=DEFAULT_DATA_SIZE)
+
+ def process_args(self, args):
+ if len(args) != 1:
+ raise cliapp.AppException('mksparse OUTPUT_FILE')
+
+ output_filename = args[0]
+ spec = self.parse_spec()
+
+ with open(output_filename, 'w') as f:
+ for word in spec:
+ if word == 'hole':
+ self.append_hole(f)
+ else:
+ assert word == 'data'
+ self.append_data(f)
+
+ def parse_spec(self):
+ text = sys.stdin.read()
+ # Remove commas.
+ text = ' '.join(text.split(','))
+
+ # Split into words.
+ words = text.split()
+
+ # Remove any words that are not "data" or "hole".
+ spec = [x for x in words if x in ('data', 'hole')]
+
+ return spec
+
+ def append_data(self, f):
+ f.write('x' * self.settings['data-size'])
+ f.flush()
+
+ def append_hole(self, f):
+ fd = f.fileno()
+ pos = os.lseek(fd, self.settings['hole-size'], os.SEEK_CUR)
+ os.ftruncate(fd, pos)
+
+
+CreateSparseFile(description=description).run()
diff --git a/sparsemap b/sparsemap
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+#
+# sparsemap
+#
+# Copyright © 2017 Richard Ipsum
+#
+# This file is part of obnam.
+#
+# obnam is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# obnam is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with obnam. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import sys
+import os
+import errno
+
+def sparsemap(fd):
+
+ # First of all, where are we currently, data or hole?
+ end_of_file_pos = os.lseek(fd, 0, os.SEEK_END)
+ what = os.SEEK_DATA
+ pos = os.lseek(fd, 0, os.SEEK_HOLE)
+
+ if pos == 0:
+ what = os.SEEK_DATA # we are already in a hole
+ elif pos == end_of_file_pos:
+ # no holes in this file
+ print('DATA', end_of_file_pos)
+ return
+ else:
+ what = os.SEEK_HOLE # we were in data
+ pos = 0
+
+ while pos < end_of_file_pos:
+
+ current = 'DATA' if what == os.SEEK_HOLE else 'HOLE'
+
+ try:
+ next_pos = os.lseek(fd, pos, what)
+ except OSError as e:
+ if e.errno == errno.ENXIO:
+ # whatever we were looking for isn't in the file
+ # that means that either the rest of the file is a hole or data
+ print(current, end_of_file_pos - pos)
+ return
+
+ print(current, next_pos - pos)
+
+ pos = next_pos
+ what = os.SEEK_DATA if what == os.SEEK_HOLE else os.SEEK_HOLE
+
+if len(sys.argv) != 2:
+ print('usage: {} FILE'.format(sys.argv[0]), file=sys.stderr)
+ sys.exit(1)
+
+fd = os.open(sys.argv[1], os.O_RDONLY)
+sparsemap(fd)
+os.close(fd)
diff --git a/yarns/implementations.yarn b/yarns/implementations.yarn
@@ -0,0 +1,9 @@
+ IMPLEMENTS GIVEN a sparse file (\S+) with a hole size (\d+) and data size (\d+) with spec ([a-zA-z,]+)
+ set -e -o pipefail
+ echo "$MATCH_4" | mksparse "$DATADIR/$MATCH_1" --hole-size "$MATCH_2" --data-size "$MATCH_3"
+
+ IMPLEMENTS WHEN we run sparsemap on (\w+)
+ run_sparsemap "$DATADIR/$MATCH_1"
+
+ IMPLEMENTS THEN stdout contains exactly (.+)
+ diff -u <(echo -e "$MATCH_1") "$DATADIR/STDOUT"
diff --git a/yarns/shell_lib b/yarns/shell_lib
@@ -0,0 +1,5 @@
+set -e -u -o pipefail
+
+run_sparsemap() {
+ sparsemap $@ > "$DATADIR/STDOUT"
+}
diff --git a/yarns/sparsemap.yarn b/yarns/sparsemap.yarn
@@ -0,0 +1,65 @@
+sparse map tests
+================
+
+Please note that these tests will depend very much on the file
+system, it is up to the file system to decide how many zeroes
+are required before it decides to represent the hole as an actual
+hole rather than just a run of zeroes.
+
+At time of writing (2017) these tests pass on ext4,
+where the default block size is 4096 bytes.
+
+ SCENARIO sparsemap single data followed by single hole
+ GIVEN a sparse file S with a hole size 4096 and data size 8192 with spec data,hole
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly DATA 8192\nHOLE 4096
+
+ SCENARIO sparsemap single hole followed by single data
+ GIVEN a sparse file S with a hole size 4096 and data size 8192 with spec hole,data
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly HOLE 4096\nDATA 8192
+
+ SCENARIO sparsemap hole followed by data followed by hole
+ GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec hole,data,hole
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly HOLE 8192\nDATA 4096\nHOLE 8192
+
+ SCENARIO tiny hole can look like data
+ GIVEN a sparse file S with a hole size 6 and data size 4096 with spec hole,data,hole
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly DATA 4108
+
+ SCENARIO sparsemap data followed by hole followed by data
+ GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data,hole,data
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly DATA 4096\nHOLE 8192\nDATA 4096
+
+ SCENARIO sparsemap sequence of holes
+ GIVEN a sparse file S with a hole size 4096 and data size 8192 with spec data,hole,hole,data
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly DATA 8192\nHOLE 8192\nDATA 8192
+
+ SCENARIO sparsemap sequence of holes (ending with hole)
+ GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data,hole,hole,hole
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly DATA 4096\nHOLE 24576
+
+ SCENARIO sparsemap file with only data
+ GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly DATA 4096
+
+ SCENARIO sparsemap file with only hole
+ GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec hole
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly HOLE 8192
+
+ SCENARIO sparsemap file with data,hole,data,hole,data
+ GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data,hole,data,hole,data
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly DATA 4096\nHOLE 8192\nDATA 4096\nHOLE 8192\nDATA 4096
+
+ SCENARIO sparsemap file with hole,data,hole,data,hole
+ GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec hole,data,hole,data,hole
+ WHEN we run sparsemap on S
+ THEN stdout contains exactly HOLE 8192\nDATA 4096\nHOLE 8192\nDATA 4096\nHOLE 8192