Exporting/migrating a Joomla website to Drupal7, with code

I was at DrupalCon 2016 in New Orleans last week and somebody encouraged me to share this info in a blog post, so here it goes.

After building the new website for Kansas Public Radio using Drupal 7, I needed a way to export all of the old articles from our Joomla website to the new website. I could have experimented with the Feeds module for Drupal or another Joomla to Drupal solution, but our installation of Joomla was sooooo old and outdated that I couldn't trust anything. I decided to write my own bit of code.

Inspecting the two separate database architectures for Drupal and Joomla showed that they where very, very different. While Joomla had all the article data in one single table, in Drupal it was spread out between different tables for different fields. Therefor, translating the data from one database to the other would have been very, very difficult. But if I saved the database data of the one Joomla table with all the article data to a CSV file, then I could use PHP and build-in Drupal functions to create the new Drupal nodes from the CSV file.

I was successful, but it wasn't without a fair amount of effort. For example, "sections" and "categories" of content (news article, trivia article, general info article, etc.) in Joomla were designated with a number that had to be mapped to their human readable name. And each content type had to be handled differently, so I wrote a separate script for each content type. They would each parse the same CSV file, and for each row in the file, if the section ID was for the content type I was looking for - (j_sectionID2string($j_sectionID) == 'news') - then it would create a new Drupal node based on the rest of the data in that row.

I also outputed the pairs of old URLs to the new URLs and saved them in a file. Later, I used this file to create a 404 page that would automatically forward the website visitor to the location of the new article, after a period of a few seconds informing them to update their links and bookmarks. 

Here is my script for migrating only our News articles:

  1. <?php
  2. $csvFile = './jos_content.csv';
  3. date_default_timezone_set('America/Chicago');
  4.  
  5. // helper functions
  6. function toLogFile($msg) {
  7.         $log_file = 'output_log';
  8.         exec("echo '" . $msg . "' >> " . $log_file);
  9.         print $msg . '<br /> ';
  10. }
  11.  
  12. // load Drupal
  13. define('DRUPAL_ROOT', '/home3/kpr/public_html');
  14. require_once DRUPAL_ROOT . '/includes/bootstrap.inc';
  15. drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
  16.  
  17. // get the csv data
  18. $file = fopen($csvFile, "r");
  19. $i = 0;
  20. while (!feof($file)) {
  21.        
  22.         // do stuff with the variables
  23.         $vars = fgetcsv($file);
  24.        
  25.         $j_id = $vars[0];
  26.         $j_title = $vars[1];
  27.         $j_alias = $vars[2];
  28.         $j_titleAlias = $vars[3];
  29.         $j_introText = $vars[4];
  30.         $j_fullText = $vars[5];
  31.         $j_state = $vars[6];
  32.         $j_sectionID = $vars[7];
  33.         $j_mask = $vars[8];
  34.         $j_catID = $vars[9];
  35.         $j_created = $vars[10];
  36.         $j_createdBy = $vars[11];
  37.         $j_createdByAlias = $vars[10];
  38.         $j_metadata = $vars[25];
  39.         $j_mp3_1 = $vars[32];
  40.         $j_mp3_2 = $vars[33];
  41.        
  42.         if ((j_sectionID2string($j_sectionID) == 'news') && ($j_state == 1))
  43.                 {
  44.                 if (j_categoryID2string($j_catID) == 'kpr news')
  45.                         {
  46.                         $i++;
  47.                         if ($i < 2501) {continue;}
  48.                         //if ($i > 2500) {fclose($file); exit();}
  49.  
  50.                         // new music blog entry
  51.                         toLogFile("==================================");
  52.                         toLogFile(date('d F Y h:i:s A'));
  53.                         toLogFile("importing ".j_categoryID2string($j_catID).": " . $j_title);
  54.                         //toLogFile($i);
  55.                        
  56.                         // calculate some drupal things
  57.                        
  58.                         //$d_termID = taxonomy_get_term_by_name("featured", '')->tid;
  59.                         $d_termID = 19; // local
  60.                        
  61.                         $d_body = $j_introText . $j_fullText;
  62.                         // fix the stupid images path
  63.                         $d_body = str_replace('src="images/', '" src="/images/', $d_body);
  64.                         $d_body = str_replace('href="images/', 'href="/images/', $d_body);
  65.                         $d_body = str_replace('float: left', '', $d_body);
  66.                         // text is really jacked up...
  67.                         $d_body = str_replace('"=""', '', $d_body);
  68.                         $d_body = str_replace('&lt;', '<', $d_body);
  69.                         $d_body = str_replace('&gt;', '>', $d_body);
  70.                         $d_body = str_replace('<hr />', '', $d_body);
  71.                        
  72.                         if (($j_metadata != "" ) && ($j_metadata != NULL)) {
  73.                                 $d_summary = strip_tags($j_metadata);
  74.                         } else {
  75.                                 $d_summary = str_replace('<img', '<span', $d_body);
  76.                         }
  77.                         //toLogFile($d_summary);
  78.                        
  79.                         $d_mp3_1 = "<a href="<a href="http://129.237.213.244:8000/mp3/"">http://129.237.213.244:8000/mp3/"</a>">http://129.237.213.244:8000/mp3/"">http://129.237.213.244:8000/mp3/"</a></a> . $j_mp3_1;
  80.                         $d_mp3_2 = "<a href="<a href="http://129.237.213.244:8000/mp3/"">http://129.237.213.244:8000/mp3/"</a>">http://129.237.213.244:8000/mp3/"">http://129.237.213.244:8000/mp3/"</a></a> . $j_mp3_2;
  81.  
  82.                         $d_userID = 1; // kpr
  83.                        
  84.                        
  85.                         $node = new stdClass();
  86.                         $node->title = $j_title;
  87.                         $node->type = "article";
  88.                         node_object_prepare($node); // Sets some defaults. Invokes hook_prepare() and hook_node_prepare().
  89.                         $node->language = LANGUAGE_NONE; // Or e.g. 'en' if locale is enabled
  90.                         $node->uid = $d_userID;
  91.                         $node->status = 1; //(1 or 0): published or not
  92.                         $node->promote = 0; //(1 or 0): promoted to front page
  93.                         $node->comment = 0; // 0 for off
  94.                        
  95.                         $node->body[LANGUAGE_NONE][0]['value'] = $d_body;
  96.                         $node->body[LANGUAGE_NONE][0]['summary'] = $d_summary;
  97.                         $node->body[LANGUAGE_NONE][0]['format'] = 'full_html';
  98.  
  99.                         // Term reference (taxonomy) field
  100.                         $node->field_news_section[LANGUAGE_NONE][0]['tid'] = $d_termID;
  101.                        
  102.                         // audio
  103.                         $node->npr_audio[LANGUAGE_NONE][0]['mp3'] = $d_mp3_1;
  104.                         $node->npr_audio[LANGUAGE_NONE][1]['mp3'] = $d_mp3_2;
  105.  
  106.                         // 'node' is default,
  107.                         // Other possible values are "user" and  "taxonomy_term"
  108.                         $node = node_submit($node); // Prepare node for saving
  109.                         $node->created = strtotime($j_created);
  110.                         node_save($node);
  111.                        
  112.                         toLogFile("URL: /news/".$j_id."-".$j_alias." -> /node/".$node->nid);
  113.                        
  114.                 }
  115.                        
  116.         }
  117. }
  118.  
  119. fclose($file);
  120.  
  121. function j_sectionID2string($i)
  122.         {
  123.         switch ($i)
  124.                 {
  125.         case 1:
  126.                 return "news";
  127.                 break;
  128.  
  129.         case 2:
  130.                 return "kpr administration";
  131.                 break;
  132.  
  133.         case 3:
  134.                 return "kpr general info";
  135.                 break;
  136.  
  137.         case 6:
  138.                 return "programs";
  139.                 break;
  140.  
  141.         case 8:
  142.                 return "music";
  143.                 break;
  144.  
  145.         case 10:
  146.                 return "live studio";
  147.                 break;
  148.  
  149.         case 11:
  150.                 return "kpr webpages";
  151.                 break;
  152.  
  153.         case 12:
  154.                 return "support";
  155.                 break;
  156.  
  157.         case 13:
  158.                 return "kpr sidecar";
  159.                 break;
  160.  
  161.         case 14:
  162.                 return "health";
  163.                 break;
  164.  
  165.         case 15:
  166.                 return "alerts";
  167.                 break;
  168.  
  169.         case 16:
  170.                 return "latest";
  171.                 break;
  172.  
  173.         default:
  174.                 return "who knows";
  175.                 break;
  176.         }
  177. }
  178.  
  179. function j_categoryID2string($i)
  180.         {
  181.         switch ($i)
  182.                 {
  183.         case 43:
  184.                 return "trivia";
  185.                 break;
  186.  
  187.         case 3:
  188.                 return "photo of the week";
  189.                 break;
  190.  
  191.         case 59:
  192.                 return "slideshow";
  193.                 break;
  194.  
  195.         case 41:
  196.                 return "classical live";
  197.                 break;
  198.  
  199.         case 49:
  200.                 return "jazz live";
  201.                 break;
  202.  
  203.         case 51:
  204.                 return "kpr live";
  205.                 break;
  206.  
  207.         case 54:
  208.                 return "rch shows";
  209.                 break;
  210.  
  211.         case 50:
  212.                 return "trail mix live";
  213.                 break;
  214.  
  215.         case 25:
  216.                 return "classical";
  217.                 break;
  218.  
  219.         case 26:
  220.                 return "jazz";
  221.                 break;
  222.  
  223.         case 27:
  224.                 return "retro cocktail hour";
  225.                 break;
  226.  
  227.         case 28:
  228.                 return "trail mix";
  229.                 break;
  230.  
  231.         case 39:
  232.                 return "kpr live studio";
  233.                 break;
  234.  
  235.         case 66:
  236.                 return "music notes";
  237.                 break;
  238.  
  239.         case 47:
  240.                 return "health series";
  241.                 break;
  242.  
  243.         case 22:
  244.                 return "kpr news";
  245.                 break;
  246.  
  247.         case 23:
  248.                 return "kpr presents";
  249.                 break;
  250.  
  251.         case 24:
  252.                 return "commentaries";
  253.                 break;
  254.  
  255.         case 34:
  256.                 return "statehouse news";
  257.                 break;
  258.  
  259.         case 76:
  260.                 return "health news";
  261.                 break;
  262.         }
  263. }
  264.  
  265. ?>

Your millage will very, a lot, but I hope this helps somebody out there!!

AttachmentSize
Package icon import.zip9.23 MB