fennol.training.databases

  1import numpy as np
  2from collections.abc import Iterable
  3import h5py
  4
  5import io
  6
  7try:
  8    import sqlite3
  9
 10    def convert_array(text):
 11        out = io.BytesIO(text)
 12        out.seek(0)
 13        return np.load(out)
 14    def adapt_array(arr):
 15        """
 16        http://stackoverflow.com/a/31312102/190597 (SoulNibbler)
 17        """
 18        out = io.BytesIO()
 19        np.save(out, arr)
 20        out.seek(0)
 21        return sqlite3.Binary(out.read())
 22
 23    sqlite3.register_adapter(np.ndarray, adapt_array)
 24    sqlite3.register_converter("array", convert_array)
 25except ImportError:
 26    sqlite3 = None
 27
 28
 29
 30
 31class DBDataset:
 32    def __init__(self,dbfile,table="training",select_keys=None):
 33        if sqlite3 is None:
 34            raise ImportError("sqlite3 is not available")
 35        self.con = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES)
 36        self.cur = self.con.cursor()
 37        self.table = table
 38        self.keys = [k[1] for k in self.cur.execute(f"PRAGMA table_info({self.table})")]
 39        if select_keys is not None:
 40            self.select_keys = [k for k in self.keys if k in select_keys]
 41        else:
 42            self.select_keys = self.keys
 43        self.N = self.cur.execute(f"SELECT MAX(rowid) FROM {self.table}").fetchone()[0]
 44
 45    def __len__(self):
 46        return self.N
 47    
 48    def __getitem__(self,idx):
 49        if isinstance(idx,Iterable):
 50            indices = [i for i in idx]
 51        elif isinstance(idx,slice):
 52            indices = range(*idx.indices(self.N))
 53        else:
 54            indices = [idx]
 55        
 56        indices = [str(i+1) if i>=0 else str(i+self.N+1) for i in indices]
 57        query = f"SELECT {', '.join(self.select_keys)} FROM {self.table} WHERE rowid IN ({', '.join(indices)})"
 58        if len(indices)==1:
 59            data = self.cur.execute(query).fetchone()
 60            return {k:d for k,d in zip(self.select_keys,data)}
 61        else:
 62            return [{k:d for k,d in zip(self.select_keys,row)} for row in self.cur.execute(query)]
 63
 64    def __iter__(self):
 65        for i in range(self.N):
 66            yield self[i]
 67    
 68    def __del__(self):
 69        self.con.close()
 70    
 71    def __repr__(self):
 72        return f"DBDataset({self.table})"
 73    
 74    def __str__(self):
 75        return f"DBDataset({self.table}) with {self.N} entries"
 76    
 77class H5Dataset:
 78    def __init__(self,h5file,table="training"):
 79        self.h5file = h5file
 80        self.table = table
 81        self.f = h5py.File(self.h5file,"r")
 82        self.dataset = self.f[self.table]
 83        self.N = len(self.dataset)
 84        self.keys = list(self.dataset["0"].keys())
 85    
 86    def __len__(self):
 87        return self.N
 88    
 89    def __getitem__(self,idx):
 90        if isinstance(idx,Iterable):
 91            indices = [i for i in idx]
 92        elif isinstance(idx,slice):
 93            indices = range(*idx.indices(self.N))
 94        else:
 95            indices = [idx]
 96        
 97        indices = [str(i) for i in indices]
 98        if len(indices)==1:
 99            data = self.dataset[indices[0]]
100            return {k:data[k][()] for k in self.keys}
101        else:
102            return [{k:data[k][()] for k in self.keys} for data in self.dataset[indices]]
103    
104    def __iter__(self):
105        for i in range(self.N):
106            yield self[i]
107    
108    def __del__(self):
109        self.f.close()
class DBDataset:
33class DBDataset:
34    def __init__(self,dbfile,table="training",select_keys=None):
35        if sqlite3 is None:
36            raise ImportError("sqlite3 is not available")
37        self.con = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES)
38        self.cur = self.con.cursor()
39        self.table = table
40        self.keys = [k[1] for k in self.cur.execute(f"PRAGMA table_info({self.table})")]
41        if select_keys is not None:
42            self.select_keys = [k for k in self.keys if k in select_keys]
43        else:
44            self.select_keys = self.keys
45        self.N = self.cur.execute(f"SELECT MAX(rowid) FROM {self.table}").fetchone()[0]
46
47    def __len__(self):
48        return self.N
49    
50    def __getitem__(self,idx):
51        if isinstance(idx,Iterable):
52            indices = [i for i in idx]
53        elif isinstance(idx,slice):
54            indices = range(*idx.indices(self.N))
55        else:
56            indices = [idx]
57        
58        indices = [str(i+1) if i>=0 else str(i+self.N+1) for i in indices]
59        query = f"SELECT {', '.join(self.select_keys)} FROM {self.table} WHERE rowid IN ({', '.join(indices)})"
60        if len(indices)==1:
61            data = self.cur.execute(query).fetchone()
62            return {k:d for k,d in zip(self.select_keys,data)}
63        else:
64            return [{k:d for k,d in zip(self.select_keys,row)} for row in self.cur.execute(query)]
65
66    def __iter__(self):
67        for i in range(self.N):
68            yield self[i]
69    
70    def __del__(self):
71        self.con.close()
72    
73    def __repr__(self):
74        return f"DBDataset({self.table})"
75    
76    def __str__(self):
77        return f"DBDataset({self.table}) with {self.N} entries"
DBDataset(dbfile, table='training', select_keys=None)
34    def __init__(self,dbfile,table="training",select_keys=None):
35        if sqlite3 is None:
36            raise ImportError("sqlite3 is not available")
37        self.con = sqlite3.connect(dbfile, detect_types=sqlite3.PARSE_DECLTYPES)
38        self.cur = self.con.cursor()
39        self.table = table
40        self.keys = [k[1] for k in self.cur.execute(f"PRAGMA table_info({self.table})")]
41        if select_keys is not None:
42            self.select_keys = [k for k in self.keys if k in select_keys]
43        else:
44            self.select_keys = self.keys
45        self.N = self.cur.execute(f"SELECT MAX(rowid) FROM {self.table}").fetchone()[0]
con
cur
table
keys
N
class H5Dataset:
 79class H5Dataset:
 80    def __init__(self,h5file,table="training"):
 81        self.h5file = h5file
 82        self.table = table
 83        self.f = h5py.File(self.h5file,"r")
 84        self.dataset = self.f[self.table]
 85        self.N = len(self.dataset)
 86        self.keys = list(self.dataset["0"].keys())
 87    
 88    def __len__(self):
 89        return self.N
 90    
 91    def __getitem__(self,idx):
 92        if isinstance(idx,Iterable):
 93            indices = [i for i in idx]
 94        elif isinstance(idx,slice):
 95            indices = range(*idx.indices(self.N))
 96        else:
 97            indices = [idx]
 98        
 99        indices = [str(i) for i in indices]
100        if len(indices)==1:
101            data = self.dataset[indices[0]]
102            return {k:data[k][()] for k in self.keys}
103        else:
104            return [{k:data[k][()] for k in self.keys} for data in self.dataset[indices]]
105    
106    def __iter__(self):
107        for i in range(self.N):
108            yield self[i]
109    
110    def __del__(self):
111        self.f.close()
H5Dataset(h5file, table='training')
80    def __init__(self,h5file,table="training"):
81        self.h5file = h5file
82        self.table = table
83        self.f = h5py.File(self.h5file,"r")
84        self.dataset = self.f[self.table]
85        self.N = len(self.dataset)
86        self.keys = list(self.dataset["0"].keys())
h5file
table
f
dataset
N
keys
def convert_array(text):
12    def convert_array(text):
13        out = io.BytesIO(text)
14        out.seek(0)
15        return np.load(out)
def adapt_array(arr):
16    def adapt_array(arr):
17        """
18        http://stackoverflow.com/a/31312102/190597 (SoulNibbler)
19        """
20        out = io.BytesIO()
21        np.save(out, arr)
22        out.seek(0)
23        return sqlite3.Binary(out.read())