| Home | Trees | Indices | Help |
|
|---|
|
|
1 """GNUmed German XDT parsing objects.
2
3 This encapsulates some of the XDT data into
4 objects for easy access.
5 """
6 #==============================================================
7 __version__ = "$Revision: 1.33 $"
8 __author__ = "K.Hilbert, S.Hilbert"
9 __license__ = "GPL"
10
11 import os.path, sys, linecache, io, re as regex, time, datetime as pyDT, logging, io
12
13
14 if __name__ == '__main__':
15 sys.path.insert(0, '../../')
16 from Gnumed.pycommon import gmDateTime, gmTools
17 from Gnumed.business import gmXdtMappings, gmPerson
18
19
20 _log = logging.getLogger('gm.xdt')
21 _log.info(__version__)
22
23 #==============================================================
28 #==============================================================
30
31 f = io.open(filename, mode = 'rt', encoding = 'utf8', errors = 'ignore')
32
33 file_encoding = None
34 for line in f:
35 field = line[3:7]
36 if field in gmXdtMappings._charset_fields:
37 _log.debug('found charset field [%s] in <%s>', field, filename)
38 val = line[7:8]
39 file_encoding = gmXdtMappings._map_field2charset[field][val]
40 _log.debug('encoding in file is "%s" (%s)', file_encoding, val)
41 break
42 f.close()
43
44 if file_encoding is None:
45 _log.debug('no encoding found in <%s>, assuming [%s]', filename, default_encoding)
46 return default_encoding
47
48 return file_encoding
49 #==============================================================
51
52 _map_id2name = {
53 '3101': 'lastnames',
54 '3102': 'firstnames',
55 '3103': 'dob',
56 '3110': 'gender',
57 '3106': 'zipurb',
58 '3107': 'street',
59 '3112': 'zip',
60 '3113': 'urb',
61 '8316': 'source'
62 }
63
64 needed_fields = (
65 '3101',
66 '3102'
67 )
68
69 interesting_fields = _map_id2name.keys()
70
71 data = {}
72
73 # try to find encoding if not given
74 if encoding is None:
75 encoding = determine_xdt_encoding(filename=filename)
76
77 xdt_file = io.open(filename, mode = 'rt', encoding = encoding)
78
79 for line in xdt_file:
80
81 # # can't use more than what's interesting ... ;-)
82 # if len(data) == len(interesting_fields):
83 # break
84
85 line = line.replace('\015','')
86 line = line.replace('\012','')
87
88 # xDT line format: aaabbbbcccccccccccCRLF where aaa = length, bbbb = record type, cccc... = content
89 field = line[3:7]
90 # do we care about this line ?
91 if field in interesting_fields:
92 try:
93 already_seen = data[_map_id2name[field]]
94 break
95 except KeyError:
96 data[_map_id2name[field]] = line[7:]
97
98 xdt_file.close()
99
100 # found enough data ?
101 if len(data) < len(needed_fields):
102 raise ValueError('insufficient patient data in XDT file [%s], found only: %s' % (filename, data))
103
104 from Gnumed.business import gmPerson
105 dto = gmPerson.cDTO_person()
106
107 dto.firstnames = data['firstnames']
108 dto.lastnames = data['lastnames']
109
110 # CAVE: different data orders are possible, so configuration may be needed
111 # FIXME: detect xDT version and use default from the standard when dob_format is None
112 try:
113 dob = time.strptime(data['dob'], gmTools.coalesce(dob_format, '%d%m%Y'))
114 dto.dob = pyDT.datetime(dob.tm_year, dob.tm_mon, dob.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone)
115 except KeyError:
116 dto.dob = None
117
118 try:
119 dto.gender = gmXdtMappings.map_gender_xdt2gm[data['gender'].lower()]
120 except KeyError:
121 dto.gender = None
122
123 dto.zip = None
124 try:
125 dto.zip = regex.match('\d{5}', data['zipurb']).group()
126 except KeyError: pass
127 try:
128 dto.zip = data['zip']
129 except KeyError: pass
130
131 dto.urb = None
132 try:
133 dto.urb = regex.sub('\d{5} ', '', data['zipurb'])
134 except KeyError: pass
135 try:
136 dto.urb = data['urb']
137 except KeyError: pass
138
139 try:
140 dto.street = data['street']
141 except KeyError:
142 dto.street = None
143
144 try:
145 dto.source = data['source']
146 except KeyError:
147 dto.source = None
148
149 return dto
150 #==============================================================
152
154
155 file_encoding = determine_xdt_encoding(filename=filename)
156 if file_encoding is None:
157 _log.warning('LDT file <%s> does not specify encoding', filename)
158 if encoding is None:
159 raise ValueError('no encoding specified in file <%s> or method call' % filename)
160
161 if override_encoding:
162 if encoding is None:
163 raise ValueError('no encoding specified in method call for overriding encoding in file <%s>' % filename)
164 self.encoding = encoding
165 else:
166 if file_encoding is None:
167 self.encoding = encoding
168 else:
169 self.encoding = file_encoding
170
171 self.filename = filename
172
173 self.__header = None
174 self.__tail = None
175 #----------------------------------------------------------
177
178 if self.__header is not None:
179 return self.__header
180
181 ldt_file = io.open(self.filename, mode = 'rt', encoding = self.encoding)
182 self.__header = []
183 for line in ldt_file:
184 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
185 # loop until found first LG-Bericht
186 if field == '8000':
187 if content in ['8202']:
188 break
189 self.__header.append(line)
190
191 ldt_file.close()
192 return self.__header
193
194 header = property(_get_header, lambda x:x)
195 #----------------------------------------------------------
197
198 if self.__tail is not None:
199 return self.__tail
200
201 ldt_file = io.open(self.filename, mode = 'rt', encoding = self.encoding)
202 self.__tail = []
203 in_tail = False
204 for line in ldt_file:
205 if in_tail:
206 self.__tail.append(line)
207 continue
208
209 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
210
211 # loop until found tail
212 if field == '8000':
213 if content not in ['8221']:
214 continue
215 in_tail = True
216 self.__tail.append(line)
217
218 ldt_file.close()
219 return self.__tail
220
221 tail = property(_get_tail, lambda x:x)
222 #----------------------------------------------------------
224
225 ldt_file = io.open(self.filename, mode = 'rt', encoding = self.encoding)
226 out_file = None
227
228 in_patient = False
229 for line in ldt_file:
230
231 if in_patient:
232 out_file.write(line)
233 continue
234
235 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
236
237 # start of record
238 if field == '8000':
239 # start of LG-Bericht
240 if content == '8202':
241 in_patient = True
242 if out_file is not None:
243 out_file.write(''.join(self.tail))
244 out_file.close()
245 #out_file = io.open(filename=filename_xxxx, mode=xxxx_'rU', encoding=self.encoding)
246 out_file.write(''.join(self.header))
247 else:
248 in_patient = False
249 if out_file is not None:
250 out_file.write(''.join(self.tail))
251 out_file.close()
252
253 if out_file is not None:
254 if not out_file.closed:
255 out_file.write(''.join(self.tail))
256 out_file.close()
257
258 ldt_file.close()
259 #==============================================================
260 # FIXME: the following *should* get wrapped in class XdtFile ...
261 #--------------------------------------------------------------
263 pat_ids = []
264 pat_names = []
265 pats = {}
266 # xDT line format: aaabbbbcccccccccccCRLF where aaa = length, bbbb = record type, cccc... = content
267 # read patient dat
268 for line in fileinput.input(aFile):
269 # remove trailing CR and/or LF
270 line = line.replace('\015','')
271 line = line.replace('\012','')
272 # do we care about this line ?
273 field = line[3:7]
274 # yes, if type = patient id
275 if field == '3000':
276 pat_id = line[7:]
277 if pat_id not in pat_ids:
278 pat_ids.append(pat_id)
279 continue
280 # yes, if type = patient name
281 if field == '3101':
282 pat_name = line [7:]
283 if pat_name not in pat_names:
284 pat_names.append(pat_name)
285 pats[pat_id] = pat_name
286 continue
287 fileinput.close()
288
289 _log.debug("patients found: %s" % len(pat_ids))
290 return pats
291 #==============================================================
293 _log.debug("getting files for patient [%s:%s]" % (ID, name))
294 files = patlst.get(aGroup = "%s:%s" % (ID, name), anOption = "files")
295 _log.debug("%s => %s" % (patdir, files))
296 return [patdir, files]
297 #==============================================================
299 content=[]
300 lineno = []
301
302 # xDT line format: aaabbbbcccccccccccCRLF where aaa = length, bbbb = record type, cccc... = content
303
304 content = []
305 record_start_lines = []
306
307 # find record starts
308 for line in fileinput.input(aFile):
309 strippedline = line.replace('\015','')
310 strippedline = strippedline.replace('\012','')
311 # do we care about this line ? (records start with 8000)
312 if strippedline[3:7] == '8000':
313 record_start_lines.append(fileinput.filelineno())
314
315 # loop over patient records
316 for aline in record_start_lines:
317 # WHY +2 ?!?
318 line = linecache.getline(aFile,aline+2)
319 # remove trailing CR and/or LF
320 strippedline = line.replace('\015','')
321 strippedline = strippedline.replace('\012','')
322 # do we care about this line ?
323 field = strippedline[3:7]
324 # extract patient id
325 if field == '3000':
326 ID = strippedline[7:]
327 line = linecache.getline(aFile,aline+3)
328 # remove trailing CR and/or LF
329 strippedline = line.replace('\015','')
330 strippedline = strippedline.replace('\012','')
331 # do we care about this line ?
332 field = strippedline[3:7]
333 if field == '3101':
334 name = strippedline [7:]
335 startline=aline
336 endline=record_start_lines[record_start_lines.index(aline)+1]
337 _log.debug("reading from%s" %str(startline)+' '+str(endline) )
338 for tmp in range(startline,endline):
339 content.append(linecache.getline(aFile,tmp))
340 _log.debug("reading %s"%tmp )
341 hashes = check_for_previous_records(ID,name,patlst)
342 # is this new content ?
343 data_hash = md5.new() # FIXME: use hashlib
344 map(data_hash.update, content)
345 digest = data_hash.hexdigest()
346 if digest not in hashes:
347 pat_dir = cfg.get("xdt-viewer", "export-dir")
348 file = write_xdt_pat_data(content, pat_dir)
349 add_file_to_patlst(ID, name, patlst, file, ahash)
350 content = []
351 else:
352 continue
353 # cleanup
354 fileinput.close()
355 patlst.store()
356 return 1
357 #==============================================================
359 tmpname = gmTools.get_unique_filename(prefix='', suffix = time.strftime(".%Y%m%d-%H%M%S", time.localtime()), tmp_dir=aDir)
360 path, fname = os.path.split(tmpname)
361 return fname
362 #==============================================================
364 """write record for this patient to new file"""
365 pat_file = io.open(os.path.join(aDir, get_rand_fname(aDir)), mode = "wt", encoding = 'utf8')
366 map(pat_file.write, data)
367 pat_file.close()
368 return fname
369 #==============================================================
371 anIdentity = "%s:%s" % (ID, name)
372 hashes = []
373 # patient not listed yet
374 if anIdentity not in patlst.getGroups():
375 _log.debug("identity not yet in list" )
376 patlst.set(aGroup = anIdentity, anOption = 'files', aValue = [], aComment = '')
377 # file already listed ?
378 file_defs = patlst.get(aGroup = anIdentity, anOption = "files")
379 for line in file_defs:
380 file, ahash = line.split(':')
381 hashes.append(ahash)
382
383 return hashes
384 #==============================================================
392 #==============================================================
393 # main
394 #--------------------------------------------------------------
395 if __name__ == "__main__":
396 from Gnumed.pycommon import gmI18N, gmLog2
397
398 root_log = logging.getLogger()
399 root_log.setLevel(logging.DEBUG)
400 _log = logging.getLogger('gm.xdt')
401
402 #from Gnumed.business import gmPerson
403 gmI18N.activate_locale()
404 gmI18N.install_domain()
405 gmDateTime.init()
406
407 ldt = cLDTFile(filename = sys.argv[1])
408 print("header:")
409 for line in ldt.header:
410 print(line.encode('utf8', 'replace'))
411 print("tail:")
412 for line in ldt.tail:
413 print(line.encode('utf8', 'replace'))
414
415 # # test framework if run by itself
416 # patfile = sys.argv[1]
417 # dobformat = sys.argv[2]
418 # encoding = sys.argv[3]
419 # print "reading patient data from xDT file [%s]" % patfile
420
421 # dto = read_person_from_xdt(patfile, dob_format=dobformat, encoding=encoding)
422 # print "DTO:", dto
423 # print "dto.dob:", dto.dob, type(dto.dob)
424 # print "dto.dob.tz:", dto.dob.tzinfo
425 # print "dto.zip: %s dto.urb: %s" % (dto.zip, dto.urb)
426 # print "dto.street", dto.street
427 # searcher = gmPersonSearch.cPatientSearcher_SQL()
428 # ident = searcher.get_identities(dto=dto)[0]
429 # print ident
430 ## print ident.get_medical_age()
431
432 #==============================================================
433
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Fri Jan 25 02:55:27 2019 | http://epydoc.sourceforge.net |