Python - merging xls files using xlrd and xlwt

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

Python - merging xls files using xlrd and xlwt

Albert-Jan Roskam
Hi,

I wrote the script below to merge xls files (created with SAVE TRANSLATE) into one multisheet xls file. I works on some files, but not on others. I pasted one of the error messages below.

I realize that this is not directly related to Spss, but I think that others might also benefit from a script like this. Any idea what is going wrong? One thing I found out that the program crashes when the ^-sign (accent circonflexe) or (more importantly) hyphens are used in the xls source files.

Cheers!!
Albert-Jan

"""
Merge all xls files in a given directory into one multisheet xls file.
The sheets get the orginal file name, without the extension.
File names should not exceed 31 characters, as this is the maximum
sheet name length
"""

import xlrd, xlwt
import glob, os.path

def merge_xls (in_dir, out_file="d:/merged_output.xls"):

    xls_files   = glob.glob(in_dir + "*.xls")
    sheet_names = []
    merged_book = xlwt.Workbook()

    [sheet_names.append(os.path.basename(v)[:-4]) for k, v in enumerate(xls_files)]
    for k, xls_file in enumerate(xls_files):
        if len (sheet_names[k]) <= 31:
            book = xlrd.open_workbook(xls_file)
            ws = merged_book.add_sheet(sheet_names[k])
            for sheetx in range(book.nsheets):
                sheet = book.sheet_by_index(sheetx)
                for rx in range(sheet.nrows):
                    for cx in range(sheet.ncols):
                         ws.write(rx, cx, sheet.cell_value(rx, cx))
        else:
            print "File name too long: <%s.xls> (maximum is 31 chars) " % (sheet_names[k])
            print "File <%s.xls> is *not* included in the merged xls file." % (sheet_names[k])
    merged_book.save(out_file)

    print "---> Merged xls file written to %s using the following source files: " % (out_file)
    for k, v in enumerate(sheet_names):
        if len(v) <= 31: print "\t", str(k+1).zfill(3), "%s.xls" % (v)

merge_xls(in_dir="d:/temp/")

*** WARNING: OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero ***
put_cell 0 1

Traceback (most recent call last):
  File "G:\GSB_Docum\Temp\arsm\generic_syntaxes\merge_xls.py", line 37, in -toplevel-
    merge_xls(in_dir="d:/temp/")
  File "G:\GSB_Docum\Temp\arsm\generic_syntaxes\merge_xls.py", line 21, in merge_xls
    book = xlrd.open_workbook(xls_file)
  File "C:\Python24\lib\site-packages\xlrd\__init__.py", line 139, in open_workbook
    bk.get_sheets()
  File "C:\Python24\lib\site-packages\xlrd\__init__.py", line 389, in get_sheets
    sht = self.get_sheet(sheetno)
  File "C:\Python24\lib\site-packages\xlrd\__init__.py", line 379, in get_sheet
    sh.read(self)
  File "C:\Python24\lib\site-packages\xlrd\sheet.py", line 285, in read
    self.put_cell(rowx, colx, XL_CELL_TEXT, bk._sharedstrings[index])
  File "C:\Python24\lib\site-packages\xlrd\sheet.py", line 214, in put_cell
    self._cell_types[rowx][colx] = ctype
IndexError: list assignment index out of range

=====================
To manage your subscription to SPSSX-L, send a message to
[hidden email] (not to SPSSX-L), with no body text except the
command. To leave the list, send the command
SIGNOFF SPSSX-L
For a list of commands to manage subscriptions, send the command
INFO REFCARD
Reply | Threaded
Open this post in threaded view
|

Re: Python - merging xls files using xlrd and xlwt

Albert-Jan Roskam
Hi again,

Please ignore my previous remark about hyphens anmd ^-signs. I turns out that xls files made by spss are causing problems. Opening and re-saving them solves the problem. But that's not an ideal solution.

Any more graceful ideas?

Albert-Jan

** test data ***.
data list free / x.
begin data
1 2 33454 56 56 767 90
end data.

save translate outfile = 'd:/temp/file1.xls' /type=xls /version=8 /fieldnames /replace.
save translate outfile = 'd:/temp/file2.xls' /type=xls /version=8 /fieldnames /replace.
save translate outfile = 'd:/temp/file3.xls' /type=xls /version=8 /fieldnames /replace.

--- On Wed, 10/15/08, Albert-jan Roskam <[hidden email]> wrote:

> From: Albert-jan Roskam <[hidden email]>
> Subject: Python - merging xls files using xlrd and xlwt
> To: [hidden email]
> Date: Wednesday, October 15, 2008, 9:42 AM
> Hi,
>
> I wrote the script below to merge xls files (created with
> SAVE TRANSLATE) into one multisheet xls file. I works on
> some files, but not on others. I pasted one of the error
> messages below.
>
> I realize that this is not directly related to Spss, but I
> think that others might also benefit from a script like
> this. Any idea what is going wrong? One thing I found out
> that the program crashes when the ^-sign (accent
> circonflexe) or (more importantly) hyphens are used in the
> xls source files.
>
> Cheers!!
> Albert-Jan
>
> """
> Merge all xls files in a given directory into one
> multisheet xls file.
> The sheets get the orginal file name, without the
> extension.
> File names should not exceed 31 characters, as this is the
> maximum
> sheet name length
> """
>
> import xlrd, xlwt
> import glob, os.path
>
> def merge_xls (in_dir,
> out_file="d:/merged_output.xls"):
>
>     xls_files   = glob.glob(in_dir + "*.xls")
>     sheet_names = []
>     merged_book = xlwt.Workbook()
>
>     [sheet_names.append(os.path.basename(v)[:-4]) for k, v
> in enumerate(xls_files)]
>     for k, xls_file in enumerate(xls_files):
>         if len (sheet_names[k]) <= 31:
>             book = xlrd.open_workbook(xls_file)
>             ws = merged_book.add_sheet(sheet_names[k])
>             for sheetx in range(book.nsheets):
>                 sheet = book.sheet_by_index(sheetx)
>                 for rx in range(sheet.nrows):
>                     for cx in range(sheet.ncols):
>                          ws.write(rx, cx,
> sheet.cell_value(rx, cx))
>         else:
>             print "File name too long: <%s.xls>
> (maximum is 31 chars) " % (sheet_names[k])
>             print "File <%s.xls> is *not*
> included in the merged xls file." % (sheet_names[k])
>     merged_book.save(out_file)
>
>     print "---> Merged xls file written to %s using
> the following source files: " % (out_file)
>     for k, v in enumerate(sheet_names):
>         if len(v) <= 31: print "\t",
> str(k+1).zfill(3), "%s.xls" % (v)
>
> merge_xls(in_dir="d:/temp/")
>
> *** WARNING: OLE2 inconsistency: SSCS size is 0 but SSAT
> size is non-zero ***
> put_cell 0 1
>
> Traceback (most recent call last):
>   File
> "G:\GSB_Docum\Temp\arsm\generic_syntaxes\merge_xls.py",
> line 37, in -toplevel-
>     merge_xls(in_dir="d:/temp/")
>   File
> "G:\GSB_Docum\Temp\arsm\generic_syntaxes\merge_xls.py",
> line 21, in merge_xls
>     book = xlrd.open_workbook(xls_file)
>   File
> "C:\Python24\lib\site-packages\xlrd\__init__.py",
> line 139, in open_workbook
>     bk.get_sheets()
>   File
> "C:\Python24\lib\site-packages\xlrd\__init__.py",
> line 389, in get_sheets
>     sht = self.get_sheet(sheetno)
>   File
> "C:\Python24\lib\site-packages\xlrd\__init__.py",
> line 379, in get_sheet
>     sh.read(self)
>   File
> "C:\Python24\lib\site-packages\xlrd\sheet.py",
> line 285, in read
>     self.put_cell(rowx, colx, XL_CELL_TEXT,
> bk._sharedstrings[index])
>   File
> "C:\Python24\lib\site-packages\xlrd\sheet.py",
> line 214, in put_cell
>     self._cell_types[rowx][colx] = ctype
> IndexError: list assignment index out of range

=====================
To manage your subscription to SPSSX-L, send a message to
[hidden email] (not to SPSSX-L), with no body text except the
command. To leave the list, send the command
SIGNOFF SPSSX-L
For a list of commands to manage subscriptions, send the command
INFO REFCARD