#!/usr/bin/python # Relational Algebra Module # OK. It needs a better name than ra.py # - elb 2007-05-17 # A relation is a set of tuples. # Meta data is stored in a matching tuple of attributes. # Each attribute has a name and a type. # In addition we define an optional domain. eblossom@eblossom-ntbk /cygdrive/e $ head -40 ra.py #!/usr/bin/python # Relational Algebra Module # OK. It needs a better name than ra.py # - elb 2007-05-17 # A relation is a set of tuples. # Meta data is stored in a matching tuple of attributes. # Each attribute has a name and a type. # In addition we define an optional domain. # Ideas: # When loading a TSV file, deduce column types # if not specified in the header. # Would this restrict union compatiblity? # Maybe would be better only when emitting DIF? # Nah. Excel at least figures it out. class Attribute(object): """Models an attribute of a relation. >>> a = Attribute("one: ordinal") >>> a.name 'one' >>> a.type 'ordinal' """ def __init__(self, theSpec): a = theSpec.split(":") self.name = None self.domain = None self.type = 'string' if 0 < len(a): self.name = a[0].strip() if 1 < len(a): if 2 < len(a): self.domain = a[1].strip() self.type = a[2].strip() else: self.type = a[1].strip() # Should also check domain and then type if still equal. def __cmp__(self, theOther): if theOther is None: return 1 if not theOther: return 1 if self.name < theOther.name: return -1 if theOther.name < self.name: return 1 return 0 class Relation(object): def __init__(self): self.attributes = () self.data = set() # This perhaps too strict. # It does not allow something like SQL's "UNION CORRESPONDING". # Maybe we could return a map # of columns when they are the same columns # just in a different order. # Then the union function could use the map # to put all tuples in the answer in the right order? def unionCompatible(self, theOther): return self.attributes == theOther.attributes def diff(self, theOther): if self.unionCompatible(theOther): answer = Relation() answer.attributes = self.attributes answer.data = self.data.difference(theOther.data) return answer def toTSV(self): """Converts this relation to an ASCII format Tab Separated Values (TSV) is used. """ print '\t'.join(a.name for a in self.attributes) for t in self.data: print '\t'.join(v for v in t) def newRelationFromTSV(theInput): """Create a new relation from ASCII data. """ r = Relation() gotHeader = False for line in theInput: t = line.strip().split('\t') if not gotHeader: r.attributes = tuple(Attribute(x) for x in t) gotHeader = True else: r.data.add(tuple(t)) return r if __name__ == '__main__': import doctest doctest.testmod()